Packages and custom functions

The lnRR_func function is here used to calculate a log response ratio (lnRR) adjusted for small sample sizes. In addition, this formula accounts for correlated samples. For more details, see Doncaster and Spake (2018) Correction for bias in meta-analysis of little-replicated studies. Methods in Ecology and Evolution; 9:634-644

# packages
library(tidyverse)
library(googlesheets4)
library(here)
library(metafor)
library(metaAidR)
library(orchaRd)
library(ape)
library(clubSandwich)
library(metaAidR)
library(patchwork)
library(emmeans)
library(kableExtra)
library(GGally)

# custom functions - here this is to get small sample size corrected lnRR also,
# the correlated samples - MS should put formulas of what we used and assumptions
# averaged
lnRR_func <- function(Mc, Nc, Me, Ne, aCV2c, aCV2e, rho = 0.8) {
    lnRR <- log(Me/Mc) + 0.5 * ((aCV2e/Ne) - (aCV2c/Nc))
    
    var_lnRR <- (aCV2c/Nc) + (aCV2e/Ne) + rho * (sqrt(aCV2c) * sqrt(aCV2e)/(Nc + 
        Ne))
    
    data.frame(lnRR, var_lnRR)
}

# Mc: Concentration of PFAS of the raw (control) sample Nc: Sample size of the
# raw (control) sample Me: Concentration of PFAS of the cooked (experimental)
# sample Ne: Sample size of the cooked (experimental) sample aCV2c: Mean
# coefficient of variation of the raw (control) samples aCV2e: Mean coefficient
# of variation of the cooked (experimental) samples

Data import and processing

Import and process raw data

Import raw data

raw_data <- read_sheet("https://docs.google.com/spreadsheets/d/1cbmYDfIc2dxHJxBaowojUZZkN31NW4sL_pHw0t9eTTU/edit#gid=477880397", 
    range = "Data_extraction_2", skip = 1, col_types = "ccncccccncncccccnncccnccnncncnccnnncncncccccccc")  # Import raw data

Process raw data

processed_data <- filter(raw_data, !PFAS_type == "PFOS_Total")
processed_data <- filter(processed_data, !Species_common == "Fish cake")

write.csv(processed_data, here("data", "pilot_data_preprocessed.csv"), row.names = F)

Load processed data

processed_data <- read_csv(here("data", "pilot_data_preprocessed.csv")) 

processed_data
## # A tibble: 512 x 48
##    Study_ID Author_year  Publication_year Country_firstAuthor Effect_ID
##    <chr>    <chr>                   <dbl> <chr>               <chr>    
##  1 F001     Alves_2017               2017 Portugal            E001     
##  2 F001     Alves_2017               2017 Portugal            E002     
##  3 F002     Barbosa_2018             2018 Portugal            E003     
##  4 F002     Barbosa_2018             2018 Portugal            E004     
##  5 F002     Barbosa_2018             2018 Portugal            E005     
##  6 F002     Barbosa_2018             2018 Portugal            E006     
##  7 F002     Barbosa_2018             2018 Portugal            E007     
##  8 F002     Barbosa_2018             2018 Portugal            E008     
##  9 F002     Barbosa_2018             2018 Portugal            E009     
## 10 F002     Barbosa_2018             2018 Portugal            E010     
## # ... with 502 more rows, and 43 more variables: Species_common <chr>,
## #   Species_Scientific <chr>, Invertebrate_vertebrate <chr>,
## #   Fish_mollusc <chr>, Moisture_loss_in_percent <dbl>, PFAS_type <chr>,
## #   PFAS_carbon_chain <dbl>, linear_total <chr>, Choice_of_9 <chr>,
## #   Cooking_method <chr>, Cooking_Category <chr>, Comments_cooking <chr>,
## #   Temperature_in_Celsius <dbl>, Length_cooking_time_in_s <dbl>, Water <chr>,
## #   Oil <chr>, Oil_type <chr>, Volume_liquid_ml <dbl>, Cohort_ID <chr>,
## #   Cohort_comment <chr>, Nc <dbl>, Pooled_Nc <dbl>, Unit_PFAS_conc <chr>,
## #   Mc <dbl>, Mc_comment <chr>, Sc <dbl>, sd <chr>,
## #   Sc_technical_biological <chr>, Ne <dbl>, Pooled_Ne <dbl>, Me <dbl>,
## #   Me_comment <chr>, Se <dbl>, Se_technical_biological <chr>,
## #   If_technical_how_many <dbl>, Unit_LOD_LOQ <chr>, LOD <chr>, LOQ <chr>,
## #   Design <chr>, DataSource <chr>, Raw_data_provided <chr>,
## #   General_comments <chr>, checked <chr>
# creating SD for just biological
# not quite sure why if_else does not work but ifesle does (handling NA???)

dat <- processed_data %>% mutate(SDc = ifelse(Sc_technical_biological == "biological", Sc, NA), # Calculate the SD of biological replicates for control samples
                                     SDe = ifelse(Se_technical_biological == "biological", Se, NA)) # Calculate the SD of biological replicates for experimental samples

Import phylogenetic information and calculate phylogenetic variance-covariance matrix

The phylogenetic tree was generated in the tree_cooked_fish_MA.Rmd document

tree <- read.tree(here("data", "plot_cooked_fish_MA.tre"))  # Import phylogenetic tree (see tree_cooked_fish_MA.Rmd for more details) 

tree <- compute.brlen(tree)  # Generate branch lengths 

cor_tree <- vcv(tree, corr = T)  # Generate phylogenetic variance-covariance matrix 

dat$Phylogeny <- str_replace(dat$Species_Scientific, " ", "_")  # Add the `phylogeny` column to the data frame

colnames(cor_tree) %in% dat$Phylogeny  # Check correspondence between tip names and data frame
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
plot(tree)

Calculate effect sizes

The average coefficient of variation in PFAS concentration was calculated for each study and treatment, according to Doncaster and Spake (2018) Correction for bias in meta-analysis of little-replicated studies. Methods in Ecology and Evolution; 9:634-644. Then, these values were averaged across studies and used to calculate the lnRR corrected for small sample sizes (for formula, see the lnRR_func above)

aCV2 <- dat %>% 
               group_by(Study_ID) %>%  # Group by study 
                                     summarise(CV2c = mean((SDc/Mc)^2, na.rm = T),  # Calculate the squared coefficient of variation for control and experimental groups
                                               CV2e = mean((SDe/Me)^2, na.rm = T)) %>% 
                                                                                      ungroup() %>% # ungroup 
                                                                                                   summarise(aCV2c = mean(CV2c, na.rm = T), # Mean CV^2 for exp and control groups across studies
                                                                                                             aCV2e = mean(CV2e, na.rm = T)) 

effect <- lnRR_func(Mc = dat$Mc, 
                    Nc = dat$Nc, 
                    Me = dat$Me, 
                    Ne = dat$Ne, 
                    aCV2c = aCV2[[1]], 
                    aCV2e = aCV2[[2]],
                    rho = 0.8)  # Calculate effect sizes

dat <- dat %>% 
             mutate(N_tilde = (Nc*Ne)/(Nc + Ne)) # Calculate the effective sample size

dat <- cbind(dat, effect) # Merge effect sizes with the data frame

VCV_lnRR <- make_VCV_matrix(dat, V = "var_lnRR", cluster = "Cohort_ID", obs = "Effect_ID", rho = 0.5) # Because some effect sizes share the same control, we generated a variance-covariance matrix to account for correlated errors (i.e. effectively dividing the weight of the correlated estimates by half)

Distribution of effect sizes

# mean
ggplot(dat, aes(x = lnRR)) + geom_histogram(fill = "salmon", col = "black", binwidth = 0.2) + 
    theme_classic()

# variance
ggplot(dat, aes(x = var_lnRR)) + geom_histogram(fill = "salmon", col = "black", binwidth = 0.05) + 
    theme_classic()

# log variance
ggplot(dat, aes(x = var_lnRR)) + geom_histogram(fill = "salmon", col = "black", binwidth = 0.05) + 
    scale_x_log10() + theme_classic()

Sample sizes

Table of sample sizes

dat %>%
       summarise( # Calculate the number of effect sizes, studies and species for the main categorical variables
                 `Studies` = n_distinct(Study_ID),
                 `Species` = n_distinct(Species_common),
                 `PFAS type` = n_distinct(PFAS_type),
                 `Cohorts` = n_distinct(Cohort_ID),
                 `Effect sizes` = n_distinct(Effect_ID),
    
                 `Effect sizes (Oil-based)` = n_distinct(Effect_ID[Cooking_Category=="oil-based"]),
                 `Studies (Oil-based)` = n_distinct(Study_ID[Cooking_Category=="oil-based"]),
                 `Species (Oil-based)` = n_distinct(Species_common[Cooking_Category=="oil-based"]),

                 `Effect sizes (Water-based)` = n_distinct(Effect_ID[Cooking_Category=="water-based"]),
                 `Studies (Water-based)` = n_distinct(Study_ID[Cooking_Category=="water-based"]),
                 `Species (Water-based)` = n_distinct(Species_common[Cooking_Category=="water-based"]),

                 `Effect sizes (No liquid)` = n_distinct(Effect_ID[Cooking_Category=="No liquid"]),
                 `Studies (No liquid)` = n_distinct(Study_ID[Cooking_Category=="No liquid"]),
                 `Species (No liquid)` = n_distinct(Species_common[Cooking_Category=="No liquid"]),) -> table_sample_sizes

table_sample_sizes<-t(table_sample_sizes)
colnames(table_sample_sizes)<-"n (sample size)"
kable(table_sample_sizes) %>% kable_styling("striped", position="left")
n (sample size)
Studies 10
Species 39
PFAS type 18
Cohorts 153
Effect sizes 512
Effect sizes (Oil-based) 303
Studies (Oil-based) 7
Species (Oil-based) 28
Effect sizes (Water-based) 140
Studies (Water-based) 8
Species (Water-based) 23
Effect sizes (No liquid) 69
Studies (No liquid) 2
Species (No liquid) 14

Summary of the dataset

kable(summary(dat), "html") %>% kable_styling("striped", position = "left") %>% scroll_box(width = "100%", 
    height = "500px")
Study_ID Author_year Publication_year Country_firstAuthor Effect_ID Species_common Species_Scientific Invertebrate_vertebrate Fish_mollusc Moisture_loss_in_percent PFAS_type PFAS_carbon_chain linear_total Choice_of_9 Cooking_method Cooking_Category Comments_cooking Temperature_in_Celsius Length_cooking_time_in_s Water Oil Oil_type Volume_liquid_ml Cohort_ID Cohort_comment Nc Pooled_Nc Unit_PFAS_conc Mc Mc_comment Sc sd Sc_technical_biological Ne Pooled_Ne Me Me_comment Se Se_technical_biological If_technical_how_many Unit_LOD_LOQ LOD LOQ Design DataSource Raw_data_provided General_comments checked SDc SDe Phylogeny N_tilde lnRR var_lnRR
Length:512 Length:512 Min. :2008 Length:512 Length:512 Length:512 Length:512 Length:512 Length:512 Min. : 6.77 Length:512 Min. : 3.000 Length:512 Length:512 Length:512 Length:512 Length:512 Min. : 75.0 Min. : 120.0 Length:512 Length:512 Length:512 Min. : 5 Length:512 Length:512 Min. : 1.00 Min. :1.000 Length:512 Min. : 0.002 Length:512 Min. : 0.0010 Length:512 Length:512 Min. : 1.00 Min. :1.000 Min. : 0.0020 Length:512 Min. : 0.000 Length:512 Min. :1.000 Length:512 Length:512 Length:512 Length:512 Length:512 Length:512 Length:512 Length:512 Min. : 0.0010 Min. : 0.0010 Length:512 Min. : 0.500 Min. :-6.0350 Min. :0.02396
Class :character Class :character 1st Qu.:2014 Class :character Class :character Class :character Class :character Class :character Class :character 1st Qu.:14.45 Class :character 1st Qu.: 8.000 Class :character Class :character Class :character Class :character Class :character 1st Qu.:100.0 1st Qu.: 600.0 Class :character Class :character Class :character 1st Qu.: 11 Class :character Class :character 1st Qu.: 5.00 1st Qu.:1.000 Class :character 1st Qu.: 0.160 Class :character 1st Qu.: 0.0010 Class :character Class :character 1st Qu.: 5.00 1st Qu.:1.000 1st Qu.: 0.0940 Class :character 1st Qu.: 0.001 Class :character 1st Qu.:1.000 Class :character Class :character Class :character Class :character Class :character Class :character Class :character Class :character 1st Qu.: 0.0354 1st Qu.: 0.0585 Class :character 1st Qu.: 2.500 1st Qu.:-0.8778 1st Qu.:0.14375
Mode :character Mode :character Median :2019 Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Median :18.35 Mode :character Median : 8.000 Mode :character Mode :character Mode :character Mode :character Mode :character Median :160.0 Median : 600.0 Mode :character Mode :character Mode :character Median : 300 Mode :character Mode :character Median :10.00 Median :1.000 Mode :character Median : 0.298 Mode :character Median : 0.0100 Mode :character Mode :character Median :10.00 Median :1.000 Median : 0.2285 Mode :character Median : 0.020 Mode :character Median :3.000 Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Median : 0.1580 Median : 0.1461 Mode :character Median : 5.000 Median :-0.1671 Median :0.14375
NA NA Mean :2017 NA NA NA NA NA NA Mean :21.04 NA Mean : 8.994 NA NA NA NA NA Mean :161.3 Mean : 733.3 NA NA NA Mean : 2304 NA NA Mean :11.49 Mean :2.371 NA Mean : 3.494 NA Mean : 1.7676 NA NA Mean :11.49 Mean :2.436 Mean : 3.2321 NA Mean : 1.822 NA Mean :2.481 NA NA NA NA NA NA NA NA Mean : 4.4069 Mean : 4.4491 NA Mean : 5.744 Mean :-0.3631 Mean :0.20045
NA NA 3rd Qu.:2019 NA NA NA NA NA NA 3rd Qu.:21.31 NA 3rd Qu.:11.000 NA NA NA NA NA 3rd Qu.:175.0 3rd Qu.: 900.0 NA NA NA 3rd Qu.: 300 NA NA 3rd Qu.:10.00 3rd Qu.:5.000 NA 3rd Qu.: 1.083 NA 3rd Qu.: 0.1185 NA NA 3rd Qu.:10.00 3rd Qu.:5.000 3rd Qu.: 1.0505 NA 3rd Qu.: 0.130 NA 3rd Qu.:3.000 NA NA NA NA NA NA NA NA 3rd Qu.: 0.5600 3rd Qu.: 0.6516 NA 3rd Qu.: 5.000 3rd Qu.: 0.1849 3rd Qu.:0.28750
NA NA Max. :2020 NA NA NA NA NA NA Max. :79.11 NA Max. :14.000 NA NA NA NA NA Max. :300.0 Max. :1500.0 NA NA NA Max. :59777 NA NA Max. :60.00 Max. :6.000 NA Max. :86.689 NA Max. :133.7000 NA NA Max. :60.00 Max. :6.000 Max. :134.4379 NA Max. :130.500 NA Max. :4.000 NA NA NA NA NA NA NA NA Max. :133.7000 Max. :130.5000 NA Max. :30.000 Max. : 3.4622 Max. :1.43748
NA NA NA NA NA NA NA NA NA NA’s :284 NA NA NA NA NA NA NA NA’s :6 NA’s :56 NA NA NA NA’s :73 NA NA NA NA NA NA NA NA’s :53 NA NA NA NA NA NA NA’s :55 NA NA’s :198 NA NA NA NA NA NA NA NA NA’s :330 NA’s :328 NA NA NA NA

Intercept meta-analytical model

Determine the random effect structure

Cohort_ID explains virtually no variance in the model. Hence, it was removed from the model. All the other random effects explained significant variance and were kept in subsequent models

MA_all_rand_effects <- rma.mv(lnRR, VCV_lnRR, # Add `VCV_lnRR` to account for correlated errors errors between cohorts (shared_controls)
              random = list(~1|Study_ID, # Identity of the study
                            ~1|Phylogeny, # Phylogenetic correlation
                            ~1|Cohort_ID, # Identity of the cohort (shared controls)
                            ~1|Species_common, # Non-phylogenetic correlation between species
                            ~1|PFAS_type, # Type of PFAS 
                            ~1|Effect_ID), # Effect size identity 
              R= list(Phylogeny = cor_tree), # Assign the 'Phylogeny' argument to the phylogenetic variance-covariance matrix
              test = "t", 
              data = dat)

summary(MA_all_rand_effects) # Cohort ID does not explain any variance 
## 
## Multivariate Meta-Analysis Model (k = 512; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -634.9177  1269.8353  1283.8353  1313.4899  1284.0580   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.5844  0.7645     10     no        Study_ID   no 
## sigma^2.2  0.0092  0.0960     38     no       Phylogeny  yes 
## sigma^2.3  0.0000  0.0004    153     no       Cohort_ID   no 
## sigma^2.4  0.2081  0.4562     39     no  Species_common   no 
## sigma^2.5  0.1009  0.3177     18     no       PFAS_type   no 
## sigma^2.6  0.4877  0.6984    512     no       Effect_ID   no 
## 
## Test for Heterogeneity:
## Q(df = 511) = 7278.2801, p-val < .0001
## 
## Model Results:
## 
## estimate      se     tval    pval    ci.lb   ci.ub 
##  -0.3142  0.2917  -1.0770  0.2820  -0.8874  0.2589    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Intercept meta-analytical model and percentage of heterogeneity

MA_model <- rma.mv(lnRR, VCV_lnRR, 
              random = list(~1|Study_ID,
                            ~1|Phylogeny, # Removed Cohort_ID
                            ~1|Species_common, 
                            ~1|PFAS_type, 
                            ~1|Effect_ID), 
              R= list(Phylogeny = cor_tree), 
              test = "t", 
              data = dat)

summary(MA_model)
## 
## Multivariate Meta-Analysis Model (k = 512; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -634.9176  1269.8353  1281.8353  1307.2535  1282.0020   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.5844  0.7645     10     no        Study_ID   no 
## sigma^2.2  0.0092  0.0959     38     no       Phylogeny  yes 
## sigma^2.3  0.2081  0.4562     39     no  Species_common   no 
## sigma^2.4  0.1009  0.3177     18     no       PFAS_type   no 
## sigma^2.5  0.4877  0.6984    512     no       Effect_ID   no 
## 
## Test for Heterogeneity:
## Q(df = 511) = 7278.2801, p-val < .0001
## 
## Model Results:
## 
## estimate      se     tval    pval    ci.lb   ci.ub 
##  -0.3142  0.2917  -1.0771  0.2819  -0.8874  0.2589    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
i2_ml(MA_model) # Percentage of heterogeneity explained by each random effect
##          I2_total       I2_Study_ID      I2_Phylogeny I2_Species_common 
##       0.917318637       0.385600355       0.006068127       0.137303674 
##      I2_PFAS_type      I2_Effect_ID 
##       0.066572256       0.321774224
# plot
orchard_plot(MA_model, mod = "Int", xlab = "lnRR", alpha=0.4) +  # Orchard plot 
           geom_errorbarh(aes(xmin = lowerPR, xmax = upperPR), height = 0, show.legend = FALSE, size = 1.1, alpha = 0.5)+ # prediction intervals
           geom_errorbarh(aes(xmin = lowerCL, xmax = upperCL), height = 0, show.legend = FALSE, size = 2)+ # confidence intervals
           geom_point(aes(fill = name),  size = 5, shape = 21)+ # mean estimate
           scale_colour_manual(values = "darkorange")+ # change colours
           scale_fill_manual(values="darkorange")+ 
           scale_size_continuous(range = c(1, 7))+ # change point scaling
           theme(panel.border = element_rect(colour = "black", fill=NA, size=1.3), # border around the plot
                 text = element_text(size = 24), # change font sizes
                 legend.title = element_text(size = 15),
                 legend.text = element_text(size = 13)) 

save(MA_model, MA_all_rand_effects, file = here("Rdata", "int_MA_models.RData")) # save the models 

Meta-regressions

Function to run all models with the same structure

run_model<-function(data,formula){
  data<-as.data.frame(data) # convert data set into a data frame to calculate VCV matrix 
  VCV<-make_VCV_matrix(data, V = "var_lnRR", cluster = "Cohort_ID", obs = "Effect_ID", rho = 0.5) # create VCV matrix for the specified data
  
  rma.mv(lnRR, VCV, # run the model, as described earlier
         mods=formula,
         random = list(~1|Study_ID,
                       ~1|Phylogeny, 
                       ~1|Species_common, 
                       ~1|PFAS_type, 
                       ~1|Effect_ID), 
         R= list(Phylogeny = cor_tree), 
         test = "t", 
         data = data)
}

Function to run plots with the same structure

plot_continuous<-function(data, model, moderator, xlab){

pred<-predict.rma(model)

data %>% mutate(fit=pred$pred, 
               ci.lb=pred$ci.lb,
               ci.ub=pred$ci.ub,
               pr.lb=pred$cr.lb,
               pr.ub=pred$cr.ub) %>% 
ggplot(aes(x = moderator, y = lnRR)) +
     geom_ribbon(aes(ymin = pr.lb, ymax = pr.ub, color = NULL), alpha = .075) +
     geom_ribbon(aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = .2) +
     geom_point(aes(size=(1/sqrt(var_lnRR)), fill=Cooking_Category), shape=21, alpha=0.8) +
     scale_fill_manual(values=c("#55C667FF", "goldenrod2", "dodgerblue3"))+
     geom_line(aes(y = fit), size = 1.5)+  
  labs(x = xlab, y = "lnRR", size = "Precison (1/SE)") +
  theme_bw() +
  scale_size_continuous(range=c(1,9))+
  geom_hline(yintercept = 0,linetype = 2, colour = "black",alpha=0.5)+   # horizontal line at lnRR = 0
  theme(text = element_text(size = 18, colour = "black", hjust = 0.5), # change font sizes and legend position
          legend.text=element_text(size=14),
          legend.position=c(0,0), 
          legend.justification = c(0,0),
          legend.background = element_blank(), 
          legend.direction="horizontal",
          legend.title = element_text(size=15), 
          panel.border=element_rect(colour="black", fill=NA, size=1.2))
}

Single-moderator models

All continuous variables were z-transformed

Cooking category

# Cooking_Category

category_model<-run_model(dat, ~Cooking_Category-1)
  
summary(category_model)
## 
## Multivariate Meta-Analysis Model (k = 512; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -631.9971  1263.9942  1279.9942  1313.8537  1280.2822   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.5905  0.7685     10     no        Study_ID   no 
## sigma^2.2  0.0023  0.0481     38     no       Phylogeny  yes 
## sigma^2.3  0.2116  0.4600     39     no  Species_common   no 
## sigma^2.4  0.1023  0.3198     18     no       PFAS_type   no 
## sigma^2.5  0.4881  0.6986    512     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 509) = 7233.4707, p-val < .0001
## 
## Test of Moderators (coefficients 1:3):
## F(df1 = 3, df2 = 509) = 1.0533, p-val = 0.3686
## 
## Model Results:
## 
##                              estimate      se     tval    pval    ci.lb   ci.ub 
## Cooking_CategoryNo liquid     -0.2018  0.3125  -0.6457  0.5187  -0.8159  0.4122 
## Cooking_Categoryoil-based     -0.3712  0.2971  -1.2495  0.2121  -0.9548  0.2124 
## Cooking_Categorywater-based   -0.2932  0.2950  -0.9939  0.3207  -0.8729  0.2864 
##  
## Cooking_CategoryNo liquid 
## Cooking_Categoryoil-based 
## Cooking_Categorywater-based 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2_ml(category_model)
##   R2_marginal R2_coditional 
##   0.002563516   0.650990549
# plot
orchard_plot(category_model, mod = "Cooking_Category", xlab = "lnRR", alpha=0.4)+
           geom_errorbarh(aes(xmin = lowerPR, xmax = upperPR), height = 0, show.legend = FALSE, size = 1.1, alpha = 0.5)+ # prediction intervals
           geom_errorbarh(aes(xmin = lowerCL, xmax = upperCL), height = 0, show.legend = FALSE, size = 2)+ # confidence intervals
           geom_point(aes(fill = name),  size = 5, shape = 21)+ # mean estimate
           scale_colour_manual(values = c("#55C667FF", "goldenrod2", "dodgerblue3"))+ # change colours
           scale_fill_manual(values=c("#55C667FF", "goldenrod2", "dodgerblue3"))+ 
           scale_size_continuous(range = c(1, 7))+ # change point scaling
           theme(panel.border = element_rect(colour = "black", fill=NA, size=1.3), # border around the plot
                 text = element_text(size = 24), # change font sizes
                 legend.title = element_text(size = 15),
                 legend.text = element_text(size = 13))

PFAS carbon chain length

# PFAS_carbon_chain

PFAS_model <- run_model(dat, ~PFAS_carbon_chain)

summary(PFAS_model)
## 
## Multivariate Meta-Analysis Model (k = 512; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -633.4258  1266.8515  1280.8515  1310.4924  1281.0746   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.5830  0.7636     10     no        Study_ID   no 
## sigma^2.2  0.0106  0.1028     38     no       Phylogeny  yes 
## sigma^2.3  0.2085  0.4566     39     no  Species_common   no 
## sigma^2.4  0.1061  0.3257     18     no       PFAS_type   no 
## sigma^2.5  0.4880  0.6985    512     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 510) = 7223.9798, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 510) = 0.1431, p-val = 0.7054
## 
## Model Results:
## 
##                    estimate      se     tval    pval    ci.lb   ci.ub 
## intrcpt             -0.4218  0.4087  -1.0322  0.3024  -1.2247  0.3810    
## PFAS_carbon_chain    0.0119  0.0315   0.3783  0.7054  -0.0499  0.0738    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2_ml(PFAS_model)
##   R2_marginal R2_coditional 
##  0.0005515213  0.6506803497
plot_continuous(dat, PFAS_model, dat$PFAS_carbon_chain, "PFAS carbon chain length")

Cooking temperature

# Temperature_in_Celsius

temp_model <- run_model(dat, ~scale(Temperature_in_Celsius))  # z-transformed 

summary(temp_model)
## 
## Multivariate Meta-Analysis Model (k = 506; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -626.2464  1252.4927  1266.4927  1296.0508  1266.7185   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.5805  0.7619     10     no        Study_ID   no 
## sigma^2.2  0.0054  0.0735     38     no       Phylogeny  yes 
## sigma^2.3  0.2079  0.4559     39     no  Species_common   no 
## sigma^2.4  0.0974  0.3122     18     no       PFAS_type   no 
## sigma^2.5  0.4896  0.6997    506     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 504) = 7121.6638, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 504) = 0.0318, p-val = 0.8586
## 
## Model Results:
## 
##                                estimate      se     tval    pval    ci.lb 
## intrcpt                         -0.3001  0.2911  -1.0309  0.3031  -0.8719 
## scale(Temperature_in_Celsius)    0.0144  0.0810   0.1783  0.8586  -0.1447 
##                                 ci.ub 
## intrcpt                        0.2718    
## scale(Temperature_in_Celsius)  0.1736    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2_ml(temp_model)
##   R2_marginal R2_coditional 
##   0.000150956   0.645470163
# Plot
dat.temp <- filter(dat, Temperature_in_Celsius != "NA")
plot_continuous(dat.temp, temp_model, dat.temp$Temperature_in_Celsius, "Cooking temperature")

Cooking time

# Length_cooking_time_in_s

time_model <- run_model(dat, ~scale(Length_cooking_time_in_s))  # z-transformed

summary(time_model)
## 
## Multivariate Meta-Analysis Model (k = 456; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -525.7357  1051.4714  1065.4714  1094.2980  1065.7225   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.5387  0.7340      9     no        Study_ID   no 
## sigma^2.2  0.0000  0.0001     30     no       Phylogeny  yes 
## sigma^2.3  0.1425  0.3775     30     no  Species_common   no 
## sigma^2.4  0.1009  0.3176     17     no       PFAS_type   no 
## sigma^2.5  0.3964  0.6296    456     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 454) = 3999.2874, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 454) = 24.7942, p-val < .0001
## 
## Model Results:
## 
##                                  estimate      se     tval    pval    ci.lb 
## intrcpt                           -0.5531  0.2884  -1.9175  0.0558  -1.1199 
## scale(Length_cooking_time_in_s)   -0.2646  0.0531  -4.9794  <.0001  -0.3690 
##                                    ci.ub 
## intrcpt                           0.0138    . 
## scale(Length_cooking_time_in_s)  -0.1601  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2_ml(time_model)
##   R2_marginal R2_coditional 
##    0.05605974    0.68251140
# Plot
dat.time <- filter(dat, Length_cooking_time_in_s != "NA")
plot_continuous(dat.time, time_model, dat.time$Length_cooking_time_in_s, "Cooking time (s)")

Volume of liquid

# Volume_liquid_ml

volume_model <- run_model(dat, ~scale(log(Volume_liquid_ml)))  # logged and z-transformed

summary(volume_model)
## 
## Multivariate Meta-Analysis Model (k = 439; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -552.0542  1104.1084  1118.1084  1146.6680  1118.3695   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.5079  0.7126      8     no        Study_ID   no 
## sigma^2.2  0.0048  0.0692     34     no       Phylogeny  yes 
## sigma^2.3  0.1498  0.3870     35     no  Species_common   no 
## sigma^2.4  0.1177  0.3431     18     no       PFAS_type   no 
## sigma^2.5  0.5153  0.7178    439     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 437) = 5805.2399, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 437) = 5.9117, p-val = 0.0154
## 
## Model Results:
## 
##                               estimate      se     tval    pval    ci.lb 
## intrcpt                        -0.3568  0.2978  -1.1981  0.2315  -0.9421 
## scale(log(Volume_liquid_ml))   -0.2543  0.1046  -2.4314  0.0154  -0.4599 
##                                 ci.ub 
## intrcpt                        0.2285    
## scale(log(Volume_liquid_ml))  -0.0487  * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2_ml(volume_model)
##   R2_marginal R2_coditional 
##     0.0475590     0.6211531
# Plot
dat.volume <- filter(dat, Volume_liquid_ml != "NA")
plot_continuous(dat.volume, volume_model, log(dat.volume$Volume_liquid_ml), "Volume of liquid (mL)")

Percentage of moisture loss

# Moisture_loss_in_percent

moisture_model <- run_model(dat, ~scale(Moisture_loss_in_percent))

summary(moisture_model)
## 
## Multivariate Meta-Analysis Model (k = 228; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -234.1714   468.3428   482.3428   506.2865   482.8566   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.0775  0.2785      6     no        Study_ID   no 
## sigma^2.2  0.2316  0.4812     18     no       Phylogeny  yes 
## sigma^2.3  0.1307  0.3615     18     no  Species_common   no 
## sigma^2.4  0.0094  0.0968     17     no       PFAS_type   no 
## sigma^2.5  0.3220  0.5674    228     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 226) = 2492.6080, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 226) = 0.0295, p-val = 0.8638
## 
## Model Results:
## 
##                                  estimate      se     tval    pval    ci.lb 
## intrcpt                            0.5347  0.3311   1.6147  0.1078  -0.1178 
## scale(Moisture_loss_in_percent)   -0.0117  0.0683  -0.1717  0.8638  -0.1463 
##                                   ci.ub 
## intrcpt                          1.1872    
## scale(Moisture_loss_in_percent)  0.1229    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2_ml(moisture_model)
##   R2_marginal R2_coditional 
##  0.0001783538  0.5825498843
# Plot
dat.moisture <- filter(dat, Moisture_loss_in_percent != "NA")
plot_continuous(dat.moisture, moisture_model, dat.moisture$Moisture_loss_in_percent, 
    "Percentage of moisture loss")

save(category_model, PFAS_model, temp_model, time_model, volume_model, moisture_model, 
    file = here("Rdata", "single_mod_models.RData"))  # Save models

Full model

# Full_model

full_model <- run_model(dat, ~-1 + Cooking_Category + scale(Temperature_in_Celsius) + 
    scale(Length_cooking_time_in_s) + scale(PFAS_carbon_chain) + scale(log(Volume_liquid_ml)))
summary(full_model)
## 
## Multivariate Meta-Analysis Model (k = 399; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -442.1936   884.3873   908.3873   956.0424   909.2105   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.0835  0.2890      7     no        Study_ID   no 
## sigma^2.2  0.0000  0.0001     26     no       Phylogeny  yes 
## sigma^2.3  0.0941  0.3067     26     no  Species_common   no 
## sigma^2.4  0.1212  0.3481     17     no       PFAS_type   no 
## sigma^2.5  0.3829  0.6188    399     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 392) = 2935.7864, p-val < .0001
## 
## Test of Moderators (coefficients 1:7):
## F(df1 = 7, df2 = 392) = 13.2854, p-val < .0001
## 
## Model Results:
## 
##                                  estimate      se     tval    pval    ci.lb 
## Cooking_CategoryNo liquid         -0.5772  0.2836  -2.0356  0.0425  -1.1348 
## Cooking_Categoryoil-based         -0.7378  0.1891  -3.9010  0.0001  -1.1097 
## Cooking_Categorywater-based       -0.4054  0.2202  -1.8409  0.0664  -0.8384 
## scale(Temperature_in_Celsius)     -0.0147  0.1119  -0.1317  0.8953  -0.2348 
## scale(Length_cooking_time_in_s)   -0.4005  0.0577  -6.9370  <.0001  -0.5140 
## scale(PFAS_carbon_chain)           0.0619  0.0799   0.7746  0.4390  -0.0952 
## scale(log(Volume_liquid_ml))      -0.7214  0.1027  -7.0271  <.0001  -0.9233 
##                                    ci.ub 
## Cooking_CategoryNo liquid        -0.0197    * 
## Cooking_Categoryoil-based        -0.3660  *** 
## Cooking_Categorywater-based       0.0276    . 
## scale(Temperature_in_Celsius)     0.2053      
## scale(Length_cooking_time_in_s)  -0.2870  *** 
## scale(PFAS_carbon_chain)          0.2189      
## scale(log(Volume_liquid_ml))     -0.5196  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r2_ml(full_model)
##   R2_marginal R2_coditional 
##     0.4600194     0.6967281
save(full_model, file = here("Rdata", "full_model.RData"))

Check collinearity of predictors

## Check for collinerarity - seems fine

vif(full_model)
##       Cooking_CategoryNo liquid       Cooking_Categoryoil-based 
##                          1.6588                          3.3203 
##     Cooking_Categorywater-based   scale(Temperature_in_Celsius) 
##                          4.1165                          2.3290 
## scale(Length_cooking_time_in_s)        scale(PFAS_carbon_chain) 
##                          1.0993                          1.0018 
##    scale(log(Volume_liquid_ml)) 
##                          1.3527
dat %>% select(Temperature_in_Celsius, Length_cooking_time_in_s, PFAS_carbon_chain, 
    Volume_liquid_ml) %>% ggpairs()

Conditional analyses

Inspection of the plots highlighted potential significant decreases in PFAS content with increased cooking time and volume of cooking. Hence, here we used emmeans (download from remotes::install_github(“rvlenth/emmeans”, dependencies = TRUE, build_opts = "")) to generate marginalised means at specified values of the different predictors. Such analysis enable the quantification of the mean effect size after controlling for different values of the moderators.

Full model

# Full model in original units (not z-transformation)
dat$log_Volume_liquid_ml <- log(dat$Volume_liquid_ml)

full_model_org_units <- run_model(dat, ~-1 + Cooking_Category + Temperature_in_Celsius + 
    Length_cooking_time_in_s + PFAS_carbon_chain + log_Volume_liquid_ml)


save(full_model, file = here("Rdata", "full_model_org_units.RData"))

Overall marginalised mean

res_mean_values <- qdrg(object = full_model_org_units, data = dat)  # Generate an grid of marginal means at the mean of each predictor variable
res_mean_values
## 'emmGrid' object with variables:
##     Cooking_Category = No liquid, oil-based, water-based
##     Temperature_in_Celsius = 162.42
##     Length_cooking_time_in_s = 726.77
##     PFAS_carbon_chain = 9.0226
##     log_Volume_liquid_ml = 4.8171
emmeans(res_mean_values, specs = ~1, df = full_model_org_units$dfs)  # Marginalised mean at the mean value of each predictor in the full model
##  1       emmean    SE  df lower.CL upper.CL
##  overall -0.556 0.196 392    -0.94   -0.171
## 
## Results are averaged over the levels of: Cooking_Category 
## Degrees-of-freedom method: user-specified 
## Confidence level used: 0.95
# When all predictors at set to their mean level, the mean effect size is -0.556
# (42.7% decrease in PFAS content in experimental vs. control samples)

emmeans(res_mean_values, specs = ~1, df = full_model_org_units$dfs, weights = "prop")  # Setting proportional weights returns similar results
##  1       emmean    SE  df lower.CL upper.CL
##  overall -0.613 0.182 392   -0.972   -0.254
## 
## Results are averaged over the levels of: Cooking_Category 
## Degrees-of-freedom method: user-specified 
## Confidence level used: 0.95

Marginalised means for different cooking categories

emmeans(res_mean_values, specs = "Cooking_Category", df = full_model_org_units$dfs)
##  Cooking_Category emmean    SE  df lower.CL upper.CL
##  No liquid        -0.559 0.283 392   -1.117 -0.00198
##  oil-based        -0.720 0.189 392   -1.091 -0.34864
##  water-based      -0.387 0.221 392   -0.823  0.04781
## 
## Degrees-of-freedom method: user-specified 
## Confidence level used: 0.95

Marginalised means for pre-determined cooking times

Here, we generate estimates at cooking times of 2, 5, 10, 15, 20 and 25 min.

res_cooking_time <- qdrg(object = full_model_org_units, data = dat, at = list(Length_cooking_time_in_s = c(120, 
    300, 600, 900, 1200, 1500)))
res_cooking_time
## 'emmGrid' object with variables:
##     Cooking_Category = No liquid, oil-based, water-based
##     Temperature_in_Celsius = 162.42
##     Length_cooking_time_in_s =  120,  300,  600,  900, 1200, 1500
##     PFAS_carbon_chain = 9.0226
##     log_Volume_liquid_ml = 4.8171

Marginalised mean estimate at each cooking time

emmeans(res_cooking_time, specs = ~1 | Length_cooking_time_in_s, df = full_model_org_units$dfs)
## Length_cooking_time_in_s =  120:
##   emmean    SE  df lower.CL upper.CL
##   0.2760 0.225 392   -0.166  0.71814
## 
## Length_cooking_time_in_s =  300:
##   emmean    SE  df lower.CL upper.CL
##   0.0293 0.210 392   -0.383  0.44141
## 
## Length_cooking_time_in_s =  600:
##   emmean    SE  df lower.CL upper.CL
##  -0.3818 0.196 392   -0.768  0.00387
## 
## Length_cooking_time_in_s =  900:
##   emmean    SE  df lower.CL upper.CL
##  -0.7930 0.200 392   -1.186 -0.39945
## 
## Length_cooking_time_in_s = 1200:
##   emmean    SE  df lower.CL upper.CL
##  -1.2041 0.221 392   -1.638 -0.77039
## 
## Length_cooking_time_in_s = 1500:
##   emmean    SE  df lower.CL upper.CL
##  -1.6152 0.254 392   -2.114 -1.11673
## 
## Results are averaged over the levels of: Cooking_Category 
## Degrees-of-freedom method: user-specified 
## Confidence level used: 0.95

Marginalised mean estimate for each cooking category, at each cooking time

emmeans(res_cooking_time, specs = ~Cooking_Category | Length_cooking_time_in_s, df = full_model_org_units$dfs)
## Length_cooking_time_in_s =  120:
##  Cooking_Category  emmean    SE  df lower.CL upper.CL
##  No liquid         0.2723 0.305 392  -0.3281    0.873
##  oil-based         0.1117 0.217 392  -0.3153    0.539
##  water-based       0.4441 0.248 392  -0.0434    0.932
## 
## Length_cooking_time_in_s =  300:
##  Cooking_Category  emmean    SE  df lower.CL upper.CL
##  No liquid         0.0256 0.294 392  -0.5524    0.604
##  oil-based        -0.1350 0.202 392  -0.5319    0.262
##  water-based       0.1974 0.234 392  -0.2628    0.658
## 
## Length_cooking_time_in_s =  600:
##  Cooking_Category  emmean    SE  df lower.CL upper.CL
##  No liquid        -0.3856 0.284 392  -0.9440    0.173
##  oil-based        -0.5462 0.189 392  -0.9175   -0.175
##  water-based      -0.2137 0.222 392  -0.6500    0.223
## 
## Length_cooking_time_in_s =  900:
##  Cooking_Category  emmean    SE  df lower.CL upper.CL
##  No liquid        -0.7967 0.286 392  -1.3595   -0.234
##  oil-based        -0.9573 0.194 392  -1.3388   -0.576
##  water-based      -0.6249 0.225 392  -1.0677   -0.182
## 
## Length_cooking_time_in_s = 1200:
##  Cooking_Category  emmean    SE  df lower.CL upper.CL
##  No liquid        -1.2078 0.300 392  -1.7985   -0.617
##  oil-based        -1.3684 0.216 392  -1.7930   -0.944
##  water-based      -1.0360 0.243 392  -1.5146   -0.557
## 
## Length_cooking_time_in_s = 1500:
##  Cooking_Category  emmean    SE  df lower.CL upper.CL
##  No liquid        -1.6190 0.325 392  -2.2578   -0.980
##  oil-based        -1.7796 0.250 392  -2.2717   -1.287
##  water-based      -1.4472 0.273 392  -1.9848   -0.909
## 
## Degrees-of-freedom method: user-specified 
## Confidence level used: 0.95

Marginalised means for different volumes of liquid

Here, we generate marginalised estimates at volumes of liquid of ~10, 500, 1000, 5000 and 10000 mL. We did not look at the means for different cooking categories because they are inherently different in the volume of liquid used.

res_volume <- qdrg(object = full_model_org_units, data = dat, at = list(log_Volume_liquid_ml = c(2.3, 
    6.2, 6.9, 8.5, 9.2)))
res_volume
## 'emmGrid' object with variables:
##     Cooking_Category = No liquid, oil-based, water-based
##     Temperature_in_Celsius = 162.42
##     Length_cooking_time_in_s = 726.77
##     PFAS_carbon_chain = 9.0226
##     log_Volume_liquid_ml = 2.3, 6.2, 6.9, 8.5, 9.2
emmeans(res_volume, specs = ~1 | log_Volume_liquid_ml, df = full_model_org_units$dfs)
## log_Volume_liquid_ml = 2.3:
##  emmean    SE  df lower.CL upper.CL
##   0.313 0.247 392   -0.172    0.798
## 
## log_Volume_liquid_ml = 6.2:
##  emmean    SE  df lower.CL upper.CL
##  -1.033 0.197 392   -1.420   -0.645
## 
## log_Volume_liquid_ml = 6.9:
##  emmean    SE  df lower.CL upper.CL
##  -1.274 0.207 392   -1.680   -0.868
## 
## log_Volume_liquid_ml = 8.5:
##  emmean    SE  df lower.CL upper.CL
##  -1.826 0.245 392   -2.309   -1.344
## 
## log_Volume_liquid_ml = 9.2:
##  emmean    SE  df lower.CL upper.CL
##  -2.068 0.268 392   -2.594   -1.541
## 
## Results are averaged over the levels of: Cooking_Category 
## Degrees-of-freedom method: user-specified 
## Confidence level used: 0.95

Marginalised means for different PFAS carbon chains

Here, we generate marginalised estimates for PFAS of 3, 6, 9 and 12 carbon chains

res_PFAS <- qdrg(object = full_model_org_units, data = dat, at = list(PFAS_carbon_chain = c(3, 
    6, 9, 12)))
res_PFAS
## 'emmGrid' object with variables:
##     Cooking_Category = No liquid, oil-based, water-based
##     Temperature_in_Celsius = 162.42
##     Length_cooking_time_in_s = 726.77
##     PFAS_carbon_chain =  3,  6,  9, 12
##     log_Volume_liquid_ml = 4.8171

Marginalised mean estimate for each PFAS carbon chain

emmeans(res_PFAS, specs = ~1 | PFAS_carbon_chain, df = full_model_org_units$dfs)
## PFAS_carbon_chain =  3:
##  emmean    SE  df lower.CL upper.CL
##  -0.715 0.288 392   -1.282  -0.1484
## 
## PFAS_carbon_chain =  6:
##  emmean    SE  df lower.CL upper.CL
##  -0.636 0.224 392   -1.076  -0.1955
## 
## PFAS_carbon_chain =  9:
##  emmean    SE  df lower.CL upper.CL
##  -0.556 0.196 392   -0.941  -0.1714
## 
## PFAS_carbon_chain = 12:
##  emmean    SE  df lower.CL upper.CL
##  -0.476 0.218 392   -0.905  -0.0476
## 
## Results are averaged over the levels of: Cooking_Category 
## Degrees-of-freedom method: user-specified 
## Confidence level used: 0.95

Marginalised mean estimate for each PFAS carbon chain, for each cooking category

emmeans(res_PFAS, specs = ~Cooking_Category | PFAS_carbon_chain, df = full_model_org_units$dfs)
## PFAS_carbon_chain =  3:
##  Cooking_Category emmean    SE  df lower.CL upper.CL
##  No liquid        -0.719 0.354 392   -1.414 -0.02401
##  oil-based        -0.880 0.283 392   -1.437 -0.32264
##  water-based      -0.547 0.307 392   -1.151  0.05648
## 
## PFAS_carbon_chain =  6:
##  Cooking_Category emmean    SE  df lower.CL upper.CL
##  No liquid        -0.640 0.304 392   -1.236 -0.04282
##  oil-based        -0.800 0.218 392   -1.228 -0.37205
##  water-based      -0.468 0.247 392   -0.954  0.01828
## 
## PFAS_carbon_chain =  9:
##  Cooking_Category emmean    SE  df lower.CL upper.CL
##  No liquid        -0.560 0.283 392   -1.117 -0.00255
##  oil-based        -0.720 0.189 392   -1.092 -0.34920
##  water-based      -0.388 0.221 392   -0.823  0.04726
## 
## PFAS_carbon_chain = 12:
##  Cooking_Category emmean    SE  df lower.CL upper.CL
##  No liquid        -0.480 0.300 392   -1.069  0.10876
##  oil-based        -0.641 0.212 392   -1.058 -0.22346
##  water-based      -0.308 0.241 392   -0.782  0.16548
## 
## Degrees-of-freedom method: user-specified 
## Confidence level used: 0.95

Subset analyses for each cooking category

Here, we investigated whether the effect of the continuous moderators on lnRR vary depending on the cooking category. Hence, we performed subset analyses for each cooking category.

Oil-based cooking

Subset data and update function

oil_dat<-filter(dat, Cooking_Category=="oil-based")

include <- row.names(cor_tree) %in% oil_dat$Phylogeny # Check which rows are present in the phylogenetic tree 
cor_tree_oil <- cor_tree[include, include] # Only include the species that match the reduced data set 


run_model_oil<-function(data,formula){
  data<-as.data.frame(data) # convert data set into a data frame to calculate VCV matrix 
  VCV<-make_VCV_matrix(data, V = "var_lnRR", cluster = "Cohort_ID", obs = "Effect_ID", rho = 0.5) # create VCV matrix for the specified data
  
  rma.mv(lnRR, VCV, # run the model, as described earlier
         mods=formula,
         random = list(~1|Study_ID,
                       ~1|Phylogeny, 
                       ~1|Species_common, 
                       ~1|PFAS_type, 
                       ~1|Effect_ID), 
         R= list(Phylogeny = cor_tree_oil), # cor_tree_oil here
         test = "t", 
         data = data)
}

Full model

full_model_oil <- run_model_oil(oil_dat, ~scale(Temperature_in_Celsius) + scale(Length_cooking_time_in_s) + 
    scale(PFAS_carbon_chain) + scale(log(Volume_liquid_ml)))

summary(full_model_oil)
## 
## Multivariate Meta-Analysis Model (k = 263; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -184.0059   368.0118   388.0118   423.5414   388.9025   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.0000  0.0001      6     no        Study_ID   no 
## sigma^2.2  0.0000  0.0000     19     no       Phylogeny  yes 
## sigma^2.3  0.0141  0.1188     19     no  Species_common   no 
## sigma^2.4  0.0433  0.2080     16     no       PFAS_type   no 
## sigma^2.5  0.1124  0.3353    263     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 258) = 573.2766, p-val < .0001
## 
## Test of Moderators (coefficients 2:5):
## F(df1 = 4, df2 = 258) = 27.1829, p-val < .0001
## 
## Model Results:
## 
##                                  estimate      se     tval    pval    ci.lb 
## intrcpt                           -0.4370  0.0959  -4.5554  <.0001  -0.6259 
## scale(Temperature_in_Celsius)     -0.0039  0.0817  -0.0474  0.9622  -0.1647 
## scale(Length_cooking_time_in_s)   -0.3805  0.0485  -7.8388  <.0001  -0.4761 
## scale(PFAS_carbon_chain)           0.1286  0.0613   2.0957  0.0371   0.0078 
## scale(log(Volume_liquid_ml))      -0.4032  0.0893  -4.5131  <.0001  -0.5791 
##                                    ci.ub 
## intrcpt                          -0.2481  *** 
## scale(Temperature_in_Celsius)     0.1570      
## scale(Length_cooking_time_in_s)  -0.2849  *** 
## scale(PFAS_carbon_chain)          0.2494    * 
## scale(log(Volume_liquid_ml))     -0.2273  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
save(full_model_oil, file = here("Rdata", "full_model_oil.RData"))

Water-based cooking

Subset data and updating functions

water_dat<-filter(dat, Cooking_Category=="water-based")

include <- row.names(cor_tree) %in% water_dat$Phylogeny # Check which rows are present in the phylogenetic tree 
cor_tree_water <- cor_tree[include, include] # Only include the species that match the reduced data set 


run_model_water<-function(data,formula){
  data<-as.data.frame(data) # convert data set into a data frame to calculate VCV matrix 
  VCV<-make_VCV_matrix(data, V = "var_lnRR", cluster = "Cohort_ID", obs = "Effect_ID", rho = 0.5) # create VCV matrix for the specified data
  
  rma.mv(lnRR, VCV, # run the model, as described earlier
         mods=formula,
         random = list(~1|Study_ID,
                       ~1|Phylogeny, 
                       ~1|Species_common, 
                       ~1|PFAS_type, 
                       ~1|Effect_ID), 
         R= list(Phylogeny = cor_tree_water), # cor_tree_water here
         test = "t", 
         data = data)
}

Full model

full_model_water <- run_model_water(water_dat, ~scale(Temperature_in_Celsius) + scale(Length_cooking_time_in_s) + 
    scale(PFAS_carbon_chain) + scale(log(Volume_liquid_ml)))

summary(full_model_water)
## 
## Multivariate Meta-Analysis Model (k = 121; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -179.5070   359.0139   377.0139   401.8735   378.6961   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.3578  0.5982      6     no        Study_ID   no 
## sigma^2.2  0.0000  0.0002     19     no       Phylogeny  yes 
## sigma^2.3  0.0000  0.0039     19     no  Species_common   no 
## sigma^2.4  0.4043  0.6359     15     no       PFAS_type   no 
## sigma^2.5  0.9470  0.9732    121     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 117) = 2237.4353, p-val < .0001
## 
## Test of Moderators (coefficients 2:4):
## F(df1 = 3, df2 = 117) = 4.6171, p-val = 0.0043
## 
## Model Results:
## 
##                                  estimate      se     tval    pval    ci.lb 
## intrcpt                           -0.9408  0.3396  -2.7700  0.0065  -1.6134 
## scale(Length_cooking_time_in_s)   -0.4503  0.1591  -2.8307  0.0055  -0.7653 
## scale(PFAS_carbon_chain)          -0.0082  0.1688  -0.0487  0.9612  -0.3426 
## scale(log(Volume_liquid_ml))      -0.7986  0.2779  -2.8732  0.0048  -1.3491 
##                                    ci.ub 
## intrcpt                          -0.2682  ** 
## scale(Length_cooking_time_in_s)  -0.1353  ** 
## scale(PFAS_carbon_chain)          0.3261     
## scale(log(Volume_liquid_ml))     -0.2481  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
save(full_model_water, file = here("Rdata", "full_model_water.RData"))

Dry cooking

Not very relevant because all effect sizes are from one study here. Also, the model does not converge when adding VCV_lnRR

dry_dat<-filter(dat, Cooking_Category=="No liquid")

include <- row.names(cor_tree) %in% dry_dat$Phylogeny # Check which rows are present in the phylogenetic tree 
cor_tree_dry <- cor_tree[include, include] # Only include the species that match the reduced data set 


run_model_dry<-function(data,formula){
  data<-as.data.frame(data) # convert data set into a data frame to calculate VCV matrix 
  rma.mv(lnRR, var_lnRR, # run the model with var_lnRR instead of lnCVR
         mods=formula,
         random = list(~1|Study_ID,
                       ~1|Phylogeny, 
                       ~1|Species_common, 
                       ~1|PFAS_type, 
                       ~1|Effect_ID), 
         R= list(Phylogeny = cor_tree_dry), # cor_tree_dry here
         test = "t", 
         data = data)
}

Full model

full_model_dry <- run_model_dry(dry_dat, ~scale(Length_cooking_time_in_s))  # Nodel does not converge with VCV_lnRR

summary(full_model_dry)
## 
## Multivariate Meta-Analysis Model (k = 47; method: REML)
## 
##   logLik  Deviance       AIC       BIC      AICc 
## -12.9722   25.9445   37.9445   48.7844   40.1550   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.0000  0.0000      1    yes        Study_ID   no 
## sigma^2.2  0.0046  0.0679      8     no       Phylogeny  yes 
## sigma^2.3  0.0022  0.0471      8     no  Species_common   no 
## sigma^2.4  0.0735  0.2711      2     no       PFAS_type   no 
## sigma^2.5  0.0000  0.0000     47     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 45) = 40.1184, p-val = 0.6785
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 45) = 38.2787, p-val < .0001
## 
## Model Results:
## 
##                                  estimate      se     tval    pval    ci.lb 
## intrcpt                           -0.7770  0.2071  -3.7513  0.0005  -1.1942 
## scale(Length_cooking_time_in_s)   -0.3461  0.0559  -6.1870  <.0001  -0.4588 
##                                    ci.ub 
## intrcpt                          -0.3598  *** 
## scale(Length_cooking_time_in_s)  -0.2334  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
save(full_model_dry, file = here("Rdata", "full_model_dry.RData"))

Plots

Cooking time

Generate predictions

  oil_dat <- filter(dat, Cooking_Category=="oil-based")
  water_dat <- filter(dat, Cooking_Category=="water-based")
  dry_dat <- filter(dat, Cooking_Category=="No liquid")

  oil_dat_time<-filter(oil_dat, Length_cooking_time_in_s!="NA") 
  water_dat_time<-filter(water_dat, Length_cooking_time_in_s!="NA") 
  dry_dat_time<-filter(dry_dat, Length_cooking_time_in_s!="NA")
  
model_oil_time<-run_model_oil(oil_dat_time, ~Length_cooking_time_in_s) 
model_water_time<-run_model_water(water_dat_time, ~Length_cooking_time_in_s) 
model_dry_time<-run_model_dry(dry_dat_time, ~Length_cooking_time_in_s) 


pred_oil_time<-predict.rma(model_oil_time)
pred_water_time<-predict.rma(model_water_time)
pred_dry_time<-predict.rma(model_dry_time)

oil_dat_time<-mutate(oil_dat_time,
                    ci.lb = pred_oil_time$ci.lb, # lower bound of the confidence interval for oil
                    ci.ub = pred_oil_time$ci.ub, # upper bound of the confidence interval for oil
                    fit = pred_oil_time$pred) # regression line for oil

water_dat_time<-mutate(water_dat_time,
                    ci.lb = pred_water_time$ci.lb, # lower bound of the confidence interval for water
                    ci.ub = pred_water_time$ci.ub, # upper bound of the confidence interval for water
                    fit = pred_water_time$pred) # regression line for water

dry_dat_time<-mutate(dry_dat_time,
                    ci.lb = pred_dry_time$ci.lb, # lower bound of the confidence interval for dry
                    ci.ub = pred_dry_time$ci.ub, # upper bound of the confidence interval for dry
                    fit = pred_dry_time$pred) # regression line for dry

Plot

ggplot(dat,aes(x = Length_cooking_time_in_s, y = lnRR, fill=Cooking_Category)) +
  
       geom_ribbon(data=water_dat_time, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=water_dat_time,aes(y = fit), size = 1.5, col="dodgerblue")+  
  
       geom_ribbon(data=oil_dat_time, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.3) +
       geom_line(data=oil_dat_time,aes(y = fit), size = 1.5, col="goldenrod")+  
  
         geom_ribbon(data=dry_dat_time, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=dry_dat_time,aes(y = fit), size = 1.5, col="palegreen3")+  
  
  
       geom_point(aes(size=(1/sqrt(var_lnRR)), fill=Cooking_Category), shape=21, alpha=0.8) +
       scale_fill_manual(values=c("#55C667FF", "goldenrod2", "dodgerblue3"))+
       labs(x = "Cooking time (s)", y = "lnRR", size = "Precison (1/SE)") + 
  scale_size_continuous(range=c(1,10))+
  theme_bw() +
  geom_hline(yintercept = 0,linetype = 2, colour = "black",alpha=0.5)+   # horizontal line at lnRR = 0
  theme(text = element_text(size = 18, colour = "black", hjust = 0.5), # change font sizes and legend position
          legend.text=element_text(size=14),
          legend.position=c(0,0), 
          legend.justification = c(0,0),
          legend.background = element_blank(), 
          legend.direction="horizontal",
          legend.title = element_text(size=15), 
          panel.border=element_rect(colour="black", fill=NA, size=1.2))

Predictions with the full model

##### Oil based
full_model_oil_time<- run_model_oil(oil_dat, ~ scale(Temperature_in_Celsius) +
                                           Length_cooking_time_in_s+
                                           scale(PFAS_carbon_chain) +
                                           scale(log(Volume_liquid_ml)))

pred_oil_time<-predict.rma(full_model_oil_time, addx=TRUE, newmods=cbind(0,c(120:1500), 0, 0)) # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time
pred_oil_time<-as.data.frame(pred_oil_time)
pred_oil_time$Length_cooking_time_in_s=pred_oil_time$X.Length_cooking_time_in_s
pred_oil_time<-left_join(oil_dat, pred_oil_time, by="Length_cooking_time_in_s")


##### Water based
full_model_water_time<- run_model_water(water_dat, ~ scale(Temperature_in_Celsius) +
                                           Length_cooking_time_in_s+
                                           scale(PFAS_carbon_chain) +
                                           scale(log(Volume_liquid_ml)))

pred_water_time<-predict.rma(full_model_water_time, addx=TRUE, newmods=cbind(c(120:1500), 0, 0)) # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time
pred_water_time<-as.data.frame(pred_water_time)
pred_water_time$Length_cooking_time_in_s=pred_water_time$X.Length_cooking_time_in_s
pred_water_time<-left_join(water_dat, pred_water_time, by="Length_cooking_time_in_s")

##### No liquid 

full_model_dry_time<- run_model_dry(dry_dat, ~ Length_cooking_time_in_s)

pred_dry_time<-predict.rma(full_model_dry_time, addx=TRUE) # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time
pred_dry_time<-as.data.frame(pred_dry_time)
pred_dry_time$Length_cooking_time_in_s=pred_dry_time$X.Length_cooking_time_in_s
pred_dry_time<-left_join(dry_dat, pred_dry_time, by="Length_cooking_time_in_s")




ggplot(dat,aes(x = Length_cooking_time_in_s, y = lnRR, fill=Cooking_Category)) +
  
       geom_ribbon(data=pred_water_time, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=pred_water_time,aes(y = pred), size = 1.5, col="dodgerblue")+  
  
       geom_ribbon(data=pred_oil_time, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=pred_oil_time,aes(y = pred), size = 1.5, col="goldenrod")+  
  
        geom_ribbon(data=pred_dry_time, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=pred_dry_time,aes(y = pred), size = 1.5, col="palegreen3")+  
  
  
       geom_point(aes(size=(1/sqrt(var_lnRR)), fill=Cooking_Category), shape=21, alpha=0.8) +
       scale_fill_manual(values=c("#55C667FF", "goldenrod2", "dodgerblue3"))+
       labs(x = "Cooking time (s)", y = "lnRR", size = "Precison (1/SE)") + 
  scale_size_continuous(range=c(1,10))+
  theme_bw() +
  geom_hline(yintercept = 0,linetype = 2, colour = "black",alpha=0.5)+   # horizontal line at lnRR = 0
  theme(text = element_text(size = 18, colour = "black", hjust = 0.5), # change font sizes and legend position
          legend.text=element_text(size=14),
          legend.position=c(0,0), 
          legend.justification = c(0,0),
          legend.background = element_blank(), 
          legend.direction="horizontal",
          legend.title = element_text(size=15), 
          panel.border=element_rect(colour="black", fill=NA, size=1.2))

Volume of liquid

Generate predictions

oil_dat_vol <- filter(oil_dat, Volume_liquid_ml != "NA")
water_dat_vol <- filter(water_dat, Volume_liquid_ml != "NA")

model_oil_vol <- run_model_oil(oil_dat_vol, ~log(Volume_liquid_ml))
model_water_vol <- run_model_water(water_dat_vol, ~log(Volume_liquid_ml))


pred_oil_vol <- predict.rma(model_oil_vol)
pred_water_vol <- predict.rma(model_water_vol)

oil_dat_vol <- mutate(oil_dat_vol, ci.lb = pred_oil_vol$ci.lb, ci.ub = pred_oil_vol$ci.ub, 
    fit = pred_oil_vol$pred)

water_dat_vol <- mutate(water_dat_vol, ci.lb = pred_water_vol$ci.lb, ci.ub = pred_water_vol$ci.ub, 
    fit = pred_water_vol$pred)

Plot

ggplot(dat, aes(x = log(Volume_liquid_ml), y = lnRR, fill = Cooking_Category)) + 
    
geom_ribbon(data = water_dat_vol, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.2) + geom_line(data = water_dat_vol, aes(y = fit), size = 1.5, col = "dodgerblue") + 
    
geom_ribbon(data = oil_dat_vol, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.3) + 
    geom_line(data = oil_dat_vol, aes(y = fit), size = 1.5, col = "goldenrod") + 
    
geom_point(aes(size = (1/sqrt(var_lnRR)), fill = Cooking_Category), shape = 21, alpha = 0.8) + 
    scale_fill_manual(values = c("#55C667FF", "goldenrod2", "dodgerblue3")) + labs(x = "ln(Volume of liquid (mL))", 
    y = "lnRR", size = "Precison (1/SE)") + scale_size_continuous(range = c(1, 10)) + 
    theme_bw() + geom_hline(yintercept = 0, linetype = 2, colour = "black", alpha = 0.5) + 
    theme(text = element_text(size = 18, colour = "black", hjust = 0.5), legend.text = element_text(size = 14), 
        legend.position = c(0, 0), legend.justification = c(0, 0), legend.background = element_blank(), 
        legend.direction = "horizontal", legend.title = element_text(size = 15), 
        panel.border = element_rect(colour = "black", fill = NA, size = 1.2))

Predictions with the full model

##### Oil based
full_model_oil_vol <- run_model_oil(oil_dat, ~scale(Temperature_in_Celsius) + scale(Length_cooking_time_in_s) + 
    scale(PFAS_carbon_chain) + log_Volume_liquid_ml)
pred_oil_vol <- predict.rma(full_model_oil_vol, addx = TRUE, newmods = cbind(0, 0, 
    0, c(log(5):log(750))))  # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time

pred_oil_vol <- as.data.frame(pred_oil_vol)
pred_oil_vol <- pred_oil_vol %>% mutate(Volume_liquid_ml = exp(X.log_Volume_liquid_ml), 
    Cooking_Category = "oil-based", lnRR = 0)  # for the plot to work, we need to add a column with cooking category and a column with lnRR


##### Water based

full_model_water_vol <- run_model_water(water_dat, ~scale(Temperature_in_Celsius) + 
    scale(Length_cooking_time_in_s) + scale(PFAS_carbon_chain) + log_Volume_liquid_ml)

pred_water_vol <- predict.rma(full_model_water_vol, addx = TRUE, newmods = cbind(0, 
    0, c(log(250):log(59777))))  # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time

pred_water_vol <- as.data.frame(pred_water_vol)
pred_water_vol <- pred_water_vol %>% mutate(Volume_liquid_ml = exp(X.log_Volume_liquid_ml), 
    Cooking_Category = "water-based", lnRR = 0)



ggplot(dat, aes(x = log(Volume_liquid_ml), y = lnRR, fill = Cooking_Category)) + 
    
geom_ribbon(data = pred_water_vol, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.2) + geom_line(data = pred_water_vol, aes(y = pred), size = 1.5, col = "dodgerblue") + 
    
geom_ribbon(data = pred_oil_vol, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.3) + 
    geom_line(data = pred_oil_vol, aes(y = pred), size = 1.5, col = "goldenrod") + 
    

geom_point(aes(size = (1/sqrt(var_lnRR)), fill = Cooking_Category), shape = 21, alpha = 0.8) + 
    scale_fill_manual(values = c("#55C667FF", "goldenrod2", "dodgerblue3")) + labs(x = "ln(Volume of liquid (mL))", 
    y = "lnRR", size = "Precison (1/SE)") + scale_size_continuous(range = c(1, 10)) + 
    theme_bw() + geom_hline(yintercept = 0, linetype = 2, colour = "black", alpha = 0.5) + 
    theme(text = element_text(size = 18, colour = "black", hjust = 0.5), legend.text = element_text(size = 14), 
        legend.position = c(0, 0), legend.justification = c(0, 0), legend.background = element_blank(), 
        legend.direction = "horizontal", legend.title = element_text(size = 15), 
        panel.border = element_rect(colour = "black", fill = NA, size = 1.2))  #### The line doesn't go all the way down for water-based because the highest values are not included in the full model

PFAS carbon chain length

Generate predictions

oil_dat_PFAS <- filter(oil_dat, PFAS_carbon_chain != "NA")
water_dat_PFAS <- filter(water_dat, PFAS_carbon_chain != "NA")
dry_dat_PFAS <- filter(dry_dat, PFAS_carbon_chain != "NA")

model_oil_PFAS <- run_model_oil(oil_dat_PFAS, ~PFAS_carbon_chain)
model_water_PFAS <- run_model_water(water_dat_PFAS, ~PFAS_carbon_chain)
model_dry_PFAS <- run_model_dry(dry_dat_PFAS, ~PFAS_carbon_chain)


pred_oil_PFAS <- predict.rma(model_oil_PFAS)
pred_water_PFAS <- predict.rma(model_water_PFAS)
pred_dry_PFAS <- predict.rma(model_dry_PFAS)

oil_dat_PFAS <- mutate(oil_dat_PFAS, ci.lb = pred_oil_PFAS$ci.lb, ci.ub = pred_oil_PFAS$ci.ub, 
    fit = pred_oil_PFAS$pred)

water_dat_PFAS <- mutate(water_dat_PFAS, ci.lb = pred_water_PFAS$ci.lb, ci.ub = pred_water_PFAS$ci.ub, 
    fit = pred_water_PFAS$pred)

dry_dat_PFAS <- mutate(dry_dat_PFAS, ci.lb = pred_dry_PFAS$ci.lb, ci.ub = pred_dry_PFAS$ci.ub, 
    fit = pred_dry_PFAS$pred)

Plot

ggplot(dat, aes(x = PFAS_carbon_chain, y = lnRR, fill = Cooking_Category)) + 
geom_ribbon(data = dry_dat_PFAS, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) + 
    geom_line(data = dry_dat_PFAS, aes(y = fit), size = 1.5, col = "palegreen3") + 
    
geom_ribbon(data = water_dat_PFAS, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.2) + geom_line(data = water_dat_PFAS, aes(y = fit), size = 1.5, col = "dodgerblue") + 
    
geom_ribbon(data = oil_dat_PFAS, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.3) + 
    geom_line(data = oil_dat_PFAS, aes(y = fit), size = 1.5, col = "goldenrod") + 
    


geom_point(aes(size = (1/sqrt(var_lnRR)), fill = Cooking_Category), shape = 21, alpha = 0.8) + 
    scale_fill_manual(values = c("#55C667FF", "goldenrod2", "dodgerblue3")) + labs(x = "PFAS carbon chain length", 
    y = "lnRR", size = "Precison (1/SE)") + scale_size_continuous(range = c(1, 10)) + 
    theme_bw() + geom_hline(yintercept = 0, linetype = 2, colour = "black", alpha = 0.5) + 
    theme(text = element_text(size = 18, colour = "black", hjust = 0.5), legend.text = element_text(size = 14), 
        legend.position = c(0, 0), legend.justification = c(0, 0), legend.background = element_blank(), 
        legend.direction = "horizontal", legend.title = element_text(size = 15), 
        panel.border = element_rect(colour = "black", fill = NA, size = 1.2))

Predictions with the full model

##### Oil based
full_model_oil_PFAS<- run_model_oil(oil_dat, ~ scale(Temperature_in_Celsius) +
                                           scale(Length_cooking_time_in_s)+
                                           PFAS_carbon_chain +
                                           scale(log(Volume_liquid_ml)))
pred_oil_PFAS<-predict.rma(full_model_oil_PFAS, addx=TRUE, newmods=cbind(0,0, c(3:14),0)) # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of PFAS carbon chain
pred_oil_PFAS<-as.data.frame(pred_oil_PFAS)
pred_oil_PFAS$PFAS_carbon_chain=pred_oil_PFAS$X.PFAS_carbon_chain
pred_oil_PFAS<-left_join(oil_dat, pred_oil_PFAS, by="PFAS_carbon_chain")


##### Water based
full_model_water_PFAS<- run_model_water(water_dat, ~ scale(Temperature_in_Celsius) +
                                           scale(Length_cooking_time_in_s)+
                                           PFAS_carbon_chain +
                                           scale(log(Volume_liquid_ml)))

pred_water_PFAS<-predict.rma(full_model_water_PFAS, addx=TRUE, newmods=cbind(0, c(3:14),0)) # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time
pred_water_PFAS<-as.data.frame(pred_water_PFAS)
pred_water_PFAS$PFAS_carbon_chain=pred_water_PFAS$X.PFAS_carbon_chain
pred_water_PFAS<-left_join(water_dat, pred_water_PFAS, by="PFAS_carbon_chain")

##### No liquid 

full_model_dry_PFAS<- run_model_dry(dry_dat, ~ PFAS_carbon_chain)

pred_dry_PFAS<-predict.rma(full_model_dry_PFAS, addx=TRUE)
pred_dry_PFAS<-as.data.frame(pred_dry_PFAS)
pred_dry_PFAS$PFAS_carbon_chain=pred_dry_PFAS$X.PFAS_carbon_chain
pred_dry_PFAS<-left_join(dry_dat, pred_dry_PFAS, by="PFAS_carbon_chain")




ggplot(dat,aes(x = PFAS_carbon_chain, y = lnRR, fill=Cooking_Category)) +
  
    
       geom_ribbon(data=pred_dry_PFAS, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=pred_dry_PFAS,aes(y = pred), size = 1.5, col="palegreen3")+  
  
  
       geom_ribbon(data=pred_water_PFAS, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=pred_water_PFAS,aes(y = pred), size = 1.5, col="dodgerblue")+  
  
  
       geom_ribbon(data=pred_oil_PFAS, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.3) +
       geom_line(data=pred_oil_PFAS,aes(y = pred), size = 1.5, col="goldenrod")+  
  
  
       geom_point(aes(size=(1/sqrt(var_lnRR)), fill=Cooking_Category), shape=21, alpha=0.8) +
       scale_fill_manual(values=c("#55C667FF", "goldenrod2", "dodgerblue3"))+
       labs(x = "PFAS carbon chain length", y = "lnRR", size = "Precison (1/SE)") + 
  scale_size_continuous(range=c(1,10))+
  theme_bw() +
  geom_hline(yintercept = 0,linetype = 2, colour = "black",alpha=0.5)+   # horizontal line at lnRR = 0
  theme(text = element_text(size = 18, colour = "black", hjust = 0.5), # change font sizes and legend position
          legend.text=element_text(size=14),
          legend.position=c(0,0), 
          legend.justification = c(0,0),
          legend.background = element_blank(), 
          legend.direction="horizontal",
          legend.title = element_text(size=15), 
          panel.border=element_rect(colour="black", fill=NA, size=1.2))

Publication bias

Funnel plot

funnel(full_model, yaxis = "seinv")

Egger regressions

egger_all <- run_model(dat, ~-1 + Cooking_Category + I(sqrt(1/N_tilde)) + scale(Publication_year) + 
    scale(Temperature_in_Celsius) + scale(Length_cooking_time_in_s) + scale(PFAS_carbon_chain) + 
    scale(log(Volume_liquid_ml)))
summary(egger_all)
## 
## Multivariate Meta-Analysis Model (k = 399; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -437.7512   875.5024   903.5024   959.0284   904.6224   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.1374  0.3707      7     no        Study_ID   no 
## sigma^2.2  0.0000  0.0001     26     no       Phylogeny  yes 
## sigma^2.3  0.0082  0.0907     26     no  Species_common   no 
## sigma^2.4  0.1188  0.3447     17     no       PFAS_type   no 
## sigma^2.5  0.3978  0.6307    399     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 390) = 2866.3281, p-val < .0001
## 
## Test of Moderators (coefficients 1:9):
## F(df1 = 9, df2 = 390) = 9.9236, p-val < .0001
## 
## Model Results:
## 
##                                  estimate      se     tval    pval    ci.lb 
## Cooking_CategoryNo liquid         -0.2088  0.3994  -0.5227  0.6015  -0.9940 
## Cooking_Categoryoil-based         -0.3346  0.3457  -0.9679  0.3337  -1.0141 
## Cooking_Categorywater-based        0.0285  0.3478   0.0821  0.9346  -0.6553 
## I(sqrt(1/N_tilde))                -0.7252  0.5313  -1.3649  0.1731  -1.7699 
## scale(Publication_year)            0.1942  0.1222   1.5890  0.1129  -0.0461 
## scale(Temperature_in_Celsius)      0.0243  0.1107   0.2195  0.8263  -0.1934 
## scale(Length_cooking_time_in_s)   -0.3939  0.0583  -6.7509  <.0001  -0.5086 
## scale(PFAS_carbon_chain)           0.0708  0.0800   0.8849  0.3767  -0.0865 
## scale(log(Volume_liquid_ml))      -0.6800  0.1037  -6.5596  <.0001  -0.8839 
##                                    ci.ub 
## Cooking_CategoryNo liquid         0.5765      
## Cooking_Categoryoil-based         0.3450      
## Cooking_Categorywater-based       0.7124      
## I(sqrt(1/N_tilde))                0.3194      
## scale(Publication_year)           0.4346      
## scale(Temperature_in_Celsius)     0.2420      
## scale(Length_cooking_time_in_s)  -0.2792  *** 
## scale(PFAS_carbon_chain)          0.2282      
## scale(log(Volume_liquid_ml))     -0.4762  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# funnel(egger_all, yaxis = 'seinv') little evidence
egger_n <- run_model(dat, ~I(sqrt(1/N_tilde)))
summary(egger_n)
## 
## Multivariate Meta-Analysis Model (k = 512; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -632.8391  1265.6782  1279.6782  1309.3191  1279.9013   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.6010  0.7752     10     no        Study_ID   no 
## sigma^2.2  0.0044  0.0664     38     no       Phylogeny  yes 
## sigma^2.3  0.1987  0.4458     39     no  Species_common   no 
## sigma^2.4  0.1008  0.3175     18     no       PFAS_type   no 
## sigma^2.5  0.4887  0.6991    512     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 510) = 6790.0424, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 510) = 0.6334, p-val = 0.4265
## 
## Model Results:
## 
##                     estimate      se     tval    pval    ci.lb   ci.ub 
## intrcpt              -0.0930  0.4055  -0.2294  0.8186  -0.8896  0.7036    
## I(sqrt(1/N_tilde))   -0.5005  0.6289  -0.7959  0.4265  -1.7361  0.7350    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
save(egger_all, egger_n, file = here("Rdata", "egger_regressions.RData"))

Publication year

pub_year <- run_model(dat, ~Publication_year)
summary(pub_year)
## 
## Multivariate Meta-Analysis Model (k = 512; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -631.7358  1263.4716  1277.4716  1307.1125  1277.6947   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.5567  0.7461     10     no        Study_ID   no 
## sigma^2.2  0.0145  0.1206     38     no       Phylogeny  yes 
## sigma^2.3  0.2046  0.4524     39     no  Species_common   no 
## sigma^2.4  0.1014  0.3184     18     no       PFAS_type   no 
## sigma^2.5  0.4878  0.6984    512     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 510) = 7278.1828, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 510) = 1.2853, p-val = 0.2574
## 
## Model Results:
## 
##                    estimate        se     tval    pval      ci.lb     ci.ub 
## intrcpt           -165.8555  146.0186  -1.1359  0.2566  -452.7275  121.0165    
## Publication_year     0.0821    0.0724   1.1337  0.2574    -0.0602    0.2243    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_continuous(dat, pub_year, dat$Publication_year, "Publication year")

##

Sensitivity analyses

Leave-one-out analyses

Here, we iteratively removed one study at the time and investigated how it affects the overall mean. Removing one of the study particularly modifies the estimate, but none of these models show a significant overall difference in PFAS concentration with cooking.

dat$Study_ID<-as.factor(dat$Study_ID)
dat<-as.data.frame(dat) # Only work with a dataframe
VCV_matrix<-list() # will need new VCV matrices because the sample size will be iteratively reduced
Leave1studyout<-list() # create a list that will host the results of each model 
for(i in 1:length(levels(dat$Study_ID))){ # N models = N studies 
  VCV_matrix[[i]]<-make_VCV_matrix(dat[dat$Study_ID != levels(dat$Study_ID)[i], ], V="var_lnRR", cluster="Cohort_ID", obs="Effect_ID") # Create a new VCV matrix for each new model
  Leave1studyout[[i]] <- rma.mv(yi = lnRR, V = VCV_matrix[[i]], # Same model structure as all the models we fitted
                                random = list(~1|Study_ID,
                                              ~1|Phylogeny, 
                                              ~1|Species_common, 
                                              ~1|PFAS_type, 
                                              ~1|Effect_ID),
                                R= list(Phylogeny = cor_tree), 
                                test = "t", 
                                data = dat[dat$Study_ID != levels(dat$Study_ID)[i], ]) # Generate a new model for each new data (iterative removal of one study at a time)
}

# The output is a list so we need to summarise the coefficients of all the models performed

results.Leave1studyout<-as.data.frame(cbind(
                                           sapply(Leave1studyout, function(x) summary(x)$beta), # extract the beta coefficient from all models
                                           sapply(Leave1studyout, function(x) summary(x)$se), # extract the standard error from all models
                                           sapply(Leave1studyout, function(x) summary(x)$zval),  # extract the z value from all models
                                           sapply(Leave1studyout, function(x) summary(x)$pval), # extract the p value from all models
                                           sapply(Leave1studyout, function(x) summary(x)$ci.lb), # extract the lower confidence interval for all models
                                           sapply(Leave1studyout, function(x) summary(x)$ci.ub))) # extract the upper confidence interval for all models

colnames(results.Leave1studyout)=c("Estimate", "SE", "zval", "pval", "ci.lb", "ci.ub") # change column names 
kable(results.Leave1studyout)%>% kable_styling("striped", position="left") %>% scroll_box(width="100%", height="500px") # Table of the results from all models
Estimate SE zval pval ci.lb ci.ub
-0.3253221 0.3107507 -1.0468911 0.2956467 -0.9358339 0.2851897
-0.4037084 0.3101145 -1.3018043 0.1935803 -1.0129906 0.2055738
-0.3997524 0.3468279 -1.1525957 0.2497971 -1.0816832 0.2821784
0.0435382 0.2731984 0.1593648 0.8734478 -0.4932604 0.5803368
-0.3312637 0.3129344 -1.0585724 0.2903281 -0.9461575 0.2836301
-0.2423434 0.3010346 -0.8050351 0.4211923 -0.8338304 0.3491436
-0.3309747 0.3124412 -1.0593181 0.2899684 -0.9448401 0.2828908
-0.2229376 0.3086359 -0.7223322 0.4706194 -0.8301566 0.3842813
-0.3882687 0.3207704 -1.2104253 0.2267090 -1.0185498 0.2420125
-0.4843182 0.2868896 -1.6881692 0.0920640 -1.0481112 0.0794748
dat %>% group_by(Author_year, Study_ID) %>% summarise(mean=mean(lnRR)) # Study F005 (DelGobbo_2008) has much lower effect sizes than the others. 
## # A tibble: 10 x 3
## # Groups:   Author_year [10]
##    Author_year      Study_ID    mean
##    <chr>            <fct>      <dbl>
##  1 Alves_2017       F001     -0.0774
##  2 Barbosa_2018     F002      0.198 
##  3 Bhavsar_2014     F003      0.153 
##  4 DelGobbo_2008    F005     -2.00  
##  5 Hu_2020          F006     -0.134 
##  6 Kim_2020         F007     -0.887 
##  7 Luo_2019         F008     -0.161 
##  8 Sungur_2019      F010     -0.893 
##  9 Taylor_2019      F011      0.213 
## 10 Vassiliadou_2015 F013      0.673

Subset analysis without Study_ID F005 (Del Gobbo et al. 2008)

Cooking time

dat.sens <- filter(dat, Author_year != "DelGobbo_2008")

include <- row.names(cor_tree) %in% dat.sens$Phylogeny  # Check which rows are present in the phylogenetic tree 
cor_tree_sens <- cor_tree[include, include]  # Only include the species that match the reduced data set 

dat.sens <- as.data.frame(dat.sens)  # convert data set into a data frame to calculate VCV matrix 
VCV_lnRR.sens <- make_VCV_matrix(dat.sens, V = "var_lnRR", cluster = "Cohort_ID", 
    obs = "Effect_ID", rho = 0.5)  # create VCV matrix for the specified data

mod.sens <- rma.mv(lnRR, VCV_lnRR.sens, mods = ~Length_cooking_time_in_s, random = list(~1 | 
    Study_ID, ~1 | Phylogeny, ~1 | Species_common, ~1 | PFAS_type, ~1 | Effect_ID), 
    R = list(Phylogeny = cor_tree_sens), test = "t", data = dat.sens)
summary(mod.sens)
## 
## Multivariate Meta-Analysis Model (k = 430; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -276.6805   553.3611   567.3611   595.7749   567.6277   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.2023  0.4498      8     no        Study_ID   no 
## sigma^2.2  0.0311  0.1763     22     no       Phylogeny  yes 
## sigma^2.3  0.0112  0.1059     22     no  Species_common   no 
## sigma^2.4  0.0964  0.3105     17     no       PFAS_type   no 
## sigma^2.5  0.0683  0.2613    430     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 428) = 1249.4809, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 428) = 83.0392, p-val < .0001
## 
## Model Results:
## 
##                           estimate      se     tval    pval    ci.lb    ci.ub 
## intrcpt                     0.6929  0.2349   2.9499  0.0034   0.2312   1.1546 
## Length_cooking_time_in_s   -0.0012  0.0001  -9.1126  <.0001  -0.0015  -0.0009 
##  
## intrcpt                    ** 
## Length_cooking_time_in_s  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dat.time.sens <- filter(dat.sens, Length_cooking_time_in_s != "NA")
plot_continuous(dat.time.sens, mod.sens, dat.time.sens$Length_cooking_time_in_s, 
    "Cooking time (s)")  # The relationship with cooking time appears even stronger

Effect of cooking time on lnRR for each cooking category
oil_dat.sens <- filter(dat.sens, Cooking_Category == "oil-based")
water_dat.sens <- filter(dat.sens, Cooking_Category == "water-based")
dry_dat.sens <- filter(dat.sens, Cooking_Category == "No liquid")


oil_dat_time.sens <- filter(oil_dat.sens, Length_cooking_time_in_s != "NA")
water_dat_time.sens <- filter(water_dat.sens, Length_cooking_time_in_s != "NA")
dry_dat_time.sens <- filter(dry_dat.sens, Length_cooking_time_in_s != "NA")

model_oil_time.sens <- run_model_oil(oil_dat_time.sens, ~Length_cooking_time_in_s)
model_water_time.sens <- run_model_water(water_dat_time.sens, ~Length_cooking_time_in_s)
model_dry_time.sens <- run_model_dry(dry_dat_time.sens, ~Length_cooking_time_in_s)

pred_oil_time.sens <- predict.rma(model_oil_time.sens)
pred_water_time.sens <- predict.rma(model_water_time.sens)
pred_dry_time.sens <- predict.rma(model_dry_time.sens)

oil_dat_time.sens <- mutate(oil_dat_time.sens, ci.lb = pred_oil_time.sens$ci.lb, 
    ci.ub = pred_oil_time.sens$ci.ub, fit = pred_oil_time.sens$pred)

water_dat_time.sens <- mutate(water_dat_time.sens, ci.lb = pred_water_time.sens$ci.lb, 
    ci.ub = pred_water_time.sens$ci.ub, fit = pred_water_time.sens$pred)

dry_dat_time.sens <- mutate(dry_dat_time.sens, ci.lb = pred_dry_time.sens$ci.lb, 
    ci.ub = pred_dry_time.sens$ci.ub, fit = pred_dry_time.sens$pred)

# Actual plot

ggplot(dat.sens, aes(x = Length_cooking_time_in_s, y = lnRR, fill = Cooking_Category)) + 
    
geom_ribbon(data = water_dat_time.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.2) + geom_line(data = water_dat_time.sens, aes(y = fit), size = 1.5, 
    col = "dodgerblue") + 
geom_ribbon(data = oil_dat_time.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.3) + geom_line(data = oil_dat_time.sens, aes(y = fit), size = 1.5, 
    col = "goldenrod") + 
geom_ribbon(data = dry_dat_time.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.25) + geom_line(data = dry_dat_time.sens, aes(y = fit), size = 1.5, 
    col = "palegreen3") + 

geom_point(aes(size = (1/sqrt(var_lnRR)), fill = Cooking_Category), shape = 21, alpha = 0.8) + 
    scale_fill_manual(values = c("#55C667FF", "goldenrod2", "dodgerblue3")) + labs(x = "Cooking time (s)", 
    y = "lnRR", size = "Precison (1/SE)") + scale_size_continuous(range = c(1, 10)) + 
    theme_bw() + geom_hline(yintercept = 0, linetype = 2, colour = "black", alpha = 0.5) + 
    theme(text = element_text(size = 18, colour = "black", hjust = 0.5), legend.text = element_text(size = 14), 
        legend.position = c(0, 0), legend.justification = c(0, 0), legend.background = element_blank(), 
        legend.direction = "horizontal", legend.title = element_text(size = 15), 
        panel.border = element_rect(colour = "black", fill = NA, size = 1.2))

###### Predictions with the full model

##### Oil based
full_model_oil_time.sens<- run_model_oil(oil_dat.sens, ~ scale(Temperature_in_Celsius) +
                                           Length_cooking_time_in_s+
                                           scale(PFAS_carbon_chain) +
                                           scale(log(Volume_liquid_ml)))

pred_oil_time.sens<-predict.rma(full_model_oil_time.sens, addx=TRUE, newmods=cbind(0,c(120:1500), 0, 0)) # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time
pred_oil_time.sens<-as.data.frame(pred_oil_time.sens)
pred_oil_time.sens$Length_cooking_time_in_s=pred_oil_time.sens$X.Length_cooking_time_in_s
pred_oil_time.sens<-left_join(oil_dat.sens, pred_oil_time.sens, by="Length_cooking_time_in_s")


##### Water based
full_model_water_time.sens<- run_model_water(water_dat.sens, ~ scale(Temperature_in_Celsius) +
                                           Length_cooking_time_in_s+
                                           scale(PFAS_carbon_chain) +
                                           scale(log(Volume_liquid_ml)))

pred_water_time.sens<-predict.rma(full_model_water_time.sens, addx=TRUE, newmods=cbind(c(120:1500), 0, 0)) # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time
pred_water_time.sens<-as.data.frame(pred_water_time.sens)
pred_water_time.sens$Length_cooking_time_in_s=pred_water_time.sens$X.Length_cooking_time_in_s
pred_water_time.sens<-left_join(water_dat, pred_water_time.sens, by="Length_cooking_time_in_s")

##### No liquid 

full_model_dry_time.sens<- run_model_dry(dry_dat.sens, ~ Length_cooking_time_in_s)

pred_dry_time.sens<-predict.rma(full_model_dry_time.sens, addx=TRUE) # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time
pred_dry_time.sens<-as.data.frame(pred_dry_time.sens)
pred_dry_time.sens$Length_cooking_time_in_s=pred_dry_time.sens$X.Length_cooking_time_in_s
pred_dry_time.sens<-left_join(dry_dat.sens, pred_dry_time.sens, by="Length_cooking_time_in_s")




ggplot(dat.sens,aes(x = Length_cooking_time_in_s, y = lnRR, fill=Cooking_Category)) +
  
       geom_ribbon(data=pred_water_time.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=pred_water_time.sens,aes(y = pred), size = 1.5, col="dodgerblue")+  
  
       geom_ribbon(data=pred_oil_time.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.3) +
       geom_line(data=pred_oil_time.sens,aes(y = pred), size = 1.5, col="goldenrod")+  
  
        geom_ribbon(data=pred_dry_time.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=pred_dry_time.sens,aes(y = pred), size = 1.5, col="palegreen3")+  
  
  
       geom_point(aes(size=(1/sqrt(var_lnRR)), fill=Cooking_Category), shape=21, alpha=0.8) +
       scale_fill_manual(values=c("#55C667FF", "goldenrod2", "dodgerblue3"))+
       labs(x = "Cooking time (s)", y = "lnRR", size = "Precison (1/SE)") + 
  scale_size_continuous(range=c(1,10))+
  theme_bw() +
  geom_hline(yintercept = 0,linetype = 2, colour = "black",alpha=0.5)+   # horizontal line at lnRR = 0
  theme(text = element_text(size = 18, colour = "black", hjust = 0.5), # change font sizes and legend position
          legend.text=element_text(size=14),
          legend.position=c(0,0), 
          legend.justification = c(0,0),
          legend.background = element_blank(), 
          legend.direction="horizontal",
          legend.title = element_text(size=15), 
          panel.border=element_rect(colour="black", fill=NA, size=1.2))

Volume of liquid

dat.sens.vol <- filter(dat.sens, Volume_liquid_ml != "NA")
include <- row.names(cor_tree) %in% dat.sens.vol$Phylogeny  # Check which rows are present in the phylogenetic tree 
cor_tree_sens.vol <- cor_tree[include, include]  # Only include the species that match the reduced data set 
VCV_lnRR.sens.vol <- make_VCV_matrix(dat.sens.vol, V = "var_lnRR", cluster = "Cohort_ID", 
    obs = "Effect_ID", rho = 0.5)  # create VCV matrix for the specified data


mod.sens.vol <- rma.mv(lnRR, VCV_lnRR.sens.vol, mods = ~log(Volume_liquid_ml), random = list(~1 | 
    Study_ID, ~1 | Phylogeny, ~1 | Species_common, ~1 | PFAS_type, ~1 | Effect_ID), 
    R = list(Phylogeny = cor_tree_sens.vol), test = "t", data = dat.sens.vol)
summary(mod.sens.vol)
## 
## Multivariate Meta-Analysis Model (k = 413; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -389.4527   778.9053   792.9053   821.0355   793.1832   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.3293  0.5739      7     no        Study_ID   no 
## sigma^2.2  0.0413  0.2031     26     no       Phylogeny  yes 
## sigma^2.3  0.1058  0.3252     27     no  Species_common   no 
## sigma^2.4  0.1297  0.3601     18     no       PFAS_type   no 
## sigma^2.5  0.2120  0.4604    413     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 411) = 3390.2773, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 411) = 0.4459, p-val = 0.5046
## 
## Model Results:
## 
##                        estimate      se     tval    pval    ci.lb   ci.ub 
## intrcpt                 -0.3392  0.4516  -0.7511  0.4530  -1.2268  0.5485    
## log(Volume_liquid_ml)    0.0462  0.0691   0.6678  0.5046  -0.0897  0.1821    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_continuous(dat.sens.vol, mod.sens.vol, log(dat.sens.vol$Volume_liquid_ml), "ln(Volume of liquid (mL))")  # The relationship with cooking time appears even stronger

Effect of cooking time on lnRR for each cooking category

The effect of volume of liquid is entirely driven by one study!!

oil_dat.sens <- filter(dat.sens, Cooking_Category == "oil-based")
water_dat.sens <- filter(dat.sens, Cooking_Category == "water-based")
dry_dat.sens <- filter(dat.sens, Cooking_Category == "No liquid")


oil_dat_vol.sens <- filter(oil_dat.sens, Volume_liquid_ml != "NA")
water_dat_vol.sens <- filter(water_dat.sens, Volume_liquid_ml != "NA")
dry_dat_vol.sens <- filter(dry_dat.sens, Volume_liquid_ml != "NA")

model_oil_vol.sens <- run_model_oil(oil_dat_vol.sens, ~log(Volume_liquid_ml))
model_water_vol.sens <- run_model_water(water_dat_vol.sens, ~log(Volume_liquid_ml))
model_dry_vol.sens <- run_model_dry(dry_dat_vol.sens, ~log(Volume_liquid_ml))

pred_oil_vol.sens <- predict.rma(model_oil_vol.sens)
pred_water_vol.sens <- predict.rma(model_water_vol.sens)
pred_dry_vol.sens <- predict.rma(model_dry_vol.sens)

oil_dat_vol.sens <- mutate(oil_dat_vol.sens, ci.lb = pred_oil_vol.sens$ci.lb, ci.ub = pred_oil_vol.sens$ci.ub, 
    fit = pred_oil_vol.sens$pred)

water_dat_vol.sens <- mutate(water_dat_vol.sens, ci.lb = pred_water_vol.sens$ci.lb, 
    ci.ub = pred_water_vol.sens$ci.ub, fit = pred_water_vol.sens$pred)

dry_dat_vol.sens <- mutate(dry_dat_vol.sens, ci.lb = pred_dry_vol.sens$ci.lb, ci.ub = pred_dry_vol.sens$ci.ub, 
    fit = pred_dry_vol.sens$pred)

# Actual plot

ggplot(dat.sens, aes(x = log(Volume_liquid_ml), y = lnRR, fill = Cooking_Category)) + 
    
geom_ribbon(data = water_dat_vol.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.2) + geom_line(data = water_dat_vol.sens, aes(y = fit), size = 1.5, 
    col = "dodgerblue") + 
geom_ribbon(data = oil_dat_vol.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.3) + geom_line(data = oil_dat_vol.sens, aes(y = fit), size = 1.5, col = "goldenrod") + 
    
geom_ribbon(data = dry_dat_vol.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.2) + geom_line(data = dry_dat_vol.sens, aes(y = fit), size = 1.5, col = "palegreen3") + 
    

geom_point(aes(size = (1/sqrt(var_lnRR)), fill = Cooking_Category), shape = 21, alpha = 0.8) + 
    scale_fill_manual(values = c("#55C667FF", "goldenrod2", "dodgerblue3")) + labs(x = "ln(Volume of liquid (mL))", 
    y = "lnRR", size = "Precison (1/SE)") + scale_size_continuous(range = c(1, 10)) + 
    theme_bw() + geom_hline(yintercept = 0, linetype = 2, colour = "black", alpha = 0.5) + 
    theme(text = element_text(size = 18, colour = "black", hjust = 0.5), legend.text = element_text(size = 14), 
        legend.position = c(0, 0), legend.justification = c(0, 0), legend.background = element_blank(), 
        legend.direction = "horizontal", legend.title = element_text(size = 15), 
        panel.border = element_rect(colour = "black", fill = NA, size = 1.2))

Predictions with the full model
##### Oil based
full_model_oil_vol.sens <- run_model_oil(oil_dat.sens, ~scale(Temperature_in_Celsius) + 
    scale(Length_cooking_time_in_s) + scale(PFAS_carbon_chain) + log_Volume_liquid_ml)
pred_oil_vol.sens <- predict.rma(full_model_oil_vol.sens, addx = TRUE, newmods = cbind(0, 
    0, 0, c(log(5):log(750))))  # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time

pred_oil_vol.sens <- as.data.frame(pred_oil_vol.sens)
pred_oil_vol.sens <- pred_oil_vol.sens %>% mutate(Volume_liquid_ml = exp(X.log_Volume_liquid_ml), 
    Cooking_Category = "oil-based", lnRR = 0)  # for the plot to work, we need to add a column with cooking category and a column with lnRR


##### Water based

full_model_water_vol.sens <- run_model_water(water_dat.sens, ~scale(Temperature_in_Celsius) + 
    scale(Length_cooking_time_in_s) + scale(PFAS_carbon_chain) + log_Volume_liquid_ml)

pred_water_vol.sens <- predict.rma(full_model_water_vol.sens, addx = TRUE, newmods = cbind(0, 
    0, c(5.521461:7.824046)))  # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time

pred_water_vol.sens <- as.data.frame(pred_water_vol.sens)
pred_water_vol.sens <- pred_water_vol.sens %>% mutate(Volume_liquid_ml = exp(X.log_Volume_liquid_ml), 
    Cooking_Category = "water-based", lnRR = 0)



ggplot(dat.sens, aes(x = log(Volume_liquid_ml), y = lnRR, fill = Cooking_Category)) + 
    
geom_ribbon(data = pred_water_vol.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.2) + geom_line(data = pred_water_vol.sens, aes(y = pred), size = 1.5, 
    col = "dodgerblue") + 
geom_ribbon(data = pred_oil_vol.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.3) + geom_line(data = pred_oil_vol.sens, aes(y = pred), size = 1.5, 
    col = "goldenrod") + 

geom_point(aes(size = (1/sqrt(var_lnRR)), fill = Cooking_Category), shape = 21, alpha = 0.8) + 
    scale_fill_manual(values = c("#55C667FF", "goldenrod2", "dodgerblue3")) + labs(x = "ln(Volume of liquid (mL))", 
    y = "lnRR", size = "Precison (1/SE)") + scale_size_continuous(range = c(1, 10)) + 
    theme_bw() + geom_hline(yintercept = 0, linetype = 2, colour = "black", alpha = 0.5) + 
    theme(text = element_text(size = 18, colour = "black", hjust = 0.5), legend.text = element_text(size = 14), 
        legend.position = c(0, 0), legend.justification = c(0, 0), legend.background = element_blank(), 
        legend.direction = "horizontal", legend.title = element_text(size = 15), 
        panel.border = element_rect(colour = "black", fill = NA, size = 1.2))  #### The line doesn't go all the way down (the predict function doesn't capture the biggest values)

PFAS carbon chain

dat.sens.PFAS <- filter(dat.sens, PFAS_carbon_chain != "NA")
include <- row.names(cor_tree) %in% dat.sens.PFAS$Phylogeny  # Check which rows are present in the phylogenetic tree 
cor_tree_sens.PFAS <- cor_tree[include, include]  # Only include the species that match the reduced data set 
VCV_lnRR.sens.PFAS <- make_VCV_matrix(dat.sens.PFAS, V = "var_lnRR", cluster = "Cohort_ID", 
    obs = "Effect_ID", rho = 0.5)  # create VCV matrix for the specified data


mod.sens.PFAS <- rma.mv(lnRR, VCV_lnRR.sens.PFAS, mods = ~PFAS_carbon_chain, random = list(~1 | 
    Study_ID, ~1 | Phylogeny, ~1 | Species_common, ~1 | PFAS_type, ~1 | Effect_ID), 
    R = list(Phylogeny = cor_tree_sens.PFAS), test = "t", data = dat.sens.PFAS)
summary(mod.sens.PFAS)
## 
## Multivariate Meta-Analysis Model (k = 486; method: REML)
## 
##    logLik   Deviance        AIC        BIC       AICc 
## -464.4535   928.9070   942.9070   972.1816   943.1423   
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed          factor    R 
## sigma^2.1  0.2361  0.4859      9     no        Study_ID   no 
## sigma^2.2  0.0998  0.3159     30     no       Phylogeny  yes 
## sigma^2.3  0.1036  0.3218     31     no  Species_common   no 
## sigma^2.4  0.0968  0.3111     18     no       PFAS_type   no 
## sigma^2.5  0.2299  0.4795    486     no       Effect_ID   no 
## 
## Test for Residual Heterogeneity:
## QE(df = 484) = 4439.7079, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## F(df1 = 1, df2 = 484) = 1.1682, p-val = 0.2803
## 
## Model Results:
## 
##                    estimate      se     tval    pval    ci.lb   ci.ub 
## intrcpt             -0.2244  0.3706  -0.6054  0.5452  -0.9525  0.5038    
## PFAS_carbon_chain    0.0301  0.0278   1.0809  0.2803  -0.0246  0.0847    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_continuous(dat.sens.PFAS, mod.sens.PFAS, dat.sens.PFAS$PFAS_carbon_chain, "PFAS carbon chain length")  # The relationship with cooking time appears even stronger

Effect of carbon chain length on lnRR for each cooking category
oil_dat.sens <- filter(dat.sens, Cooking_Category == "oil-based")
water_dat.sens <- filter(dat.sens, Cooking_Category == "water-based")
dry_dat.sens <- filter(dat.sens, Cooking_Category == "No liquid")


oil_dat_PFAS.sens <- filter(oil_dat.sens, PFAS_carbon_chain != "NA")
water_dat_PFAS.sens <- filter(water_dat.sens, PFAS_carbon_chain != "NA")
dry_dat_PFAS.sens <- filter(dry_dat.sens, PFAS_carbon_chain != "NA")

model_oil_PFAS.sens <- run_model_oil(oil_dat_PFAS.sens, ~PFAS_carbon_chain)
model_water_PFAS.sens <- run_model_water(water_dat_PFAS.sens, ~PFAS_carbon_chain)
model_dry_PFAS.sens <- run_model_dry(dry_dat_PFAS.sens, ~PFAS_carbon_chain)

pred_oil_PFAS.sens <- predict.rma(model_oil_PFAS.sens)
pred_water_PFAS.sens <- predict.rma(model_water_PFAS.sens)
pred_dry_PFAS.sens <- predict.rma(model_dry_PFAS.sens)

oil_dat_PFAS.sens <- mutate(oil_dat_PFAS.sens, ci.lb = pred_oil_PFAS.sens$ci.lb, 
    ci.ub = pred_oil_PFAS.sens$ci.ub, fit = pred_oil_PFAS.sens$pred)

water_dat_PFAS.sens <- mutate(water_dat_PFAS.sens, ci.lb = pred_water_PFAS.sens$ci.lb, 
    ci.ub = pred_water_PFAS.sens$ci.ub, fit = pred_water_PFAS.sens$pred)

dry_dat_PFAS.sens <- mutate(dry_dat_PFAS.sens, ci.lb = pred_dry_PFAS.sens$ci.lb, 
    ci.ub = pred_dry_PFAS.sens$ci.ub, fit = pred_dry_PFAS.sens$pred)

# Actual plot

ggplot(dat.sens, aes(x = PFAS_carbon_chain, y = lnRR, fill = Cooking_Category)) + 
    
geom_ribbon(data = dry_dat_PFAS.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.2) + geom_line(data = dry_dat_PFAS.sens, aes(y = fit), size = 1.5, 
    col = "palegreen3") + 
geom_ribbon(data = oil_dat_PFAS.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.3) + geom_line(data = oil_dat_PFAS.sens, aes(y = fit), size = 1.5, 
    col = "goldenrod") + 
geom_ribbon(data = water_dat_PFAS.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), 
    alpha = 0.3) + geom_line(data = water_dat_PFAS.sens, aes(y = fit), size = 1.5, 
    col = "dodgerblue") + 
geom_point(aes(size = (1/sqrt(var_lnRR)), fill = Cooking_Category), shape = 21, alpha = 0.8) + 
    scale_fill_manual(values = c("#55C667FF", "goldenrod2", "dodgerblue3")) + labs(x = "PFAS carbon chain length", 
    y = "lnRR", size = "Precison (1/SE)") + scale_size_continuous(range = c(1, 10)) + 
    theme_bw() + geom_hline(yintercept = 0, linetype = 2, colour = "black", alpha = 0.5) + 
    theme(text = element_text(size = 18, colour = "black", hjust = 0.5), legend.text = element_text(size = 14), 
        legend.position = c(0, 0), legend.justification = c(0, 0), legend.background = element_blank(), 
        legend.direction = "horizontal", legend.title = element_text(size = 15), 
        panel.border = element_rect(colour = "black", fill = NA, size = 1.2))

Predictions with the full model
##### Oil based
full_model_oil_PFAS.sens<- run_model_oil(oil_dat.sens, ~ scale(Temperature_in_Celsius) +
                                           scale(Length_cooking_time_in_s)+
                                           PFAS_carbon_chain +
                                           scale(log(Volume_liquid_ml)))
pred_oil_PFAS.sens<-predict.rma(full_model_oil_PFAS.sens, addx=TRUE, newmods=cbind(0,0, c(3:14),0)) # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of PFAS carbon chain
pred_oil_PFAS.sens<-as.data.frame(pred_oil_PFAS.sens)
pred_oil_PFAS.sens$PFAS_carbon_chain=pred_oil_PFAS.sens$X.PFAS_carbon_chain
pred_oil_PFAS.sens<-left_join(oil_dat.sens, pred_oil_PFAS.sens, by="PFAS_carbon_chain")


##### Water based
full_model_water_PFAS.sens<- run_model_water(water_dat.sens, ~ scale(Temperature_in_Celsius) +
                                           scale(Length_cooking_time_in_s)+
                                           PFAS_carbon_chain +
                                           scale(log(Volume_liquid_ml)))

pred_water_PFAS.sens<-predict.rma(full_model_water_PFAS.sens, addx=TRUE, newmods=cbind(0, c(3:14),0)) # Set all predictors to their mean (mean =0 when z-transformed) and set the range of values of cooking time
pred_water_PFAS.sens<-as.data.frame(pred_water_PFAS.sens)
pred_water_PFAS.sens$PFAS_carbon_chain=pred_water_PFAS.sens$X.PFAS_carbon_chain
pred_water_PFAS.sens<-left_join(water_dat.sens, pred_water_PFAS.sens, by="PFAS_carbon_chain")

##### No liquid 

full_model_dry_PFAS.sens<- run_model_dry(dry_dat.sens, ~ PFAS_carbon_chain)

pred_dry_PFAS.sens<-predict.rma(full_model_dry_PFAS.sens, addx=TRUE)
pred_dry_PFAS.sens<-as.data.frame(pred_dry_PFAS.sens)
pred_dry_PFAS.sens$PFAS_carbon_chain=pred_dry_PFAS.sens$X.PFAS_carbon_chain
pred_dry_PFAS.sens<-left_join(dry_dat.sens, pred_dry_PFAS.sens, by="PFAS_carbon_chain")




ggplot(dat.sens,aes(x = PFAS_carbon_chain, y = lnRR, fill=Cooking_Category)) +
  
    
       geom_ribbon(data=pred_dry_PFAS.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=pred_dry_PFAS.sens,aes(y = pred), size = 1.5, col="palegreen3")+  
  
  
       geom_ribbon(data=pred_water_PFAS.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.2) +
       geom_line(data=pred_water_PFAS.sens,aes(y = pred), size = 1.5, col="dodgerblue")+  
  
  
       geom_ribbon(data=pred_oil_PFAS.sens, aes(ymin = ci.lb, ymax = ci.ub, color = NULL), alpha = 0.3) +
       geom_line(data=pred_oil_PFAS.sens,aes(y = pred), size = 1.5, col="goldenrod")+  
  
  
       geom_point(aes(size=(1/sqrt(var_lnRR)), fill=Cooking_Category), shape=21, alpha=0.8) +
       scale_fill_manual(values=c("#55C667FF", "goldenrod2", "dodgerblue3"))+
       labs(x = "PFAS carbon chain length", y = "lnRR", size = "Precison (1/SE)") + 
  scale_size_continuous(range=c(1,10))+
  theme_bw() +
  geom_hline(yintercept = 0,linetype = 2, colour = "black",alpha=0.5)+   # horizontal line at lnRR = 0
  theme(text = element_text(size = 18, colour = "black", hjust = 0.5), # change font sizes and legend position
          legend.text=element_text(size=14),
          legend.position=c(0,0), 
          legend.justification = c(0,0),
          legend.background = element_blank(), 
          legend.direction="horizontal",
          legend.title = element_text(size=15), 
          panel.border=element_rect(colour="black", fill=NA, size=1.2))

Funnel plot

funnel(mod.sens, yaxis = "seinv")